This jupyter notebook uses the CICIoT2023 public dataset (available at https://doi.org/10.3390/s23135941) to explore the use of Ensemble Learning methods as a means to improve predictive accuracy for anomaly detection.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
# Miscellaneous packages
import time #for calculating elapsed time for training tasks
import os #for checking if file exists
import socket #for getting FQDN of local machine
import math #square root function
import sys
# Packages from scikit-learn
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV #for hyperparameter optimization
from sklearn.model_selection import cross_val_score #for cross fold validation
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.ensemble import BaggingClassifier, VotingClassifier, StackingClassifier, AdaBoostClassifier, GradientBoostingClassifier #Packages for Ensemble Learning
from sklearn.linear_model import LogisticRegression #used by stacking models
from sklearn.tree import DecisionTreeClassifier #used by stacking models
from imblearn.under_sampling import RandomUnderSampler #may need to install with: conda install -c conda-forge imbalanced-learn
from imblearn.over_sampling import SMOTE #may need to install with: conda install -c conda-forge imbalanced-learn
import xgboost as xgb #eXtreme Gradient Booster, not part of sklearn, need to install with: pip install xgboost
# function to show missing values in dataset
def get_type_missing(df):
df_types = pd.DataFrame()
df_types['data_type'] = df.dtypes
df_types['missing_values'] = df.isnull().sum()
return df_types.sort_values(by='missing_values', ascending=False)
# function to create a confusion matrix
def visualize_confusion_matrix(y_test_label, y_pred):
#
## Calculate accuracy
#accuracy = accuracy_score(y_test_label, y_pred)
#print("Accuracy:", accuracy)
#
# Confusion Matrix
cm = confusion_matrix(y_test_label, y_pred)
#
# visualize confusion matrix with more detailed labels
# https://medium.com/@dtuk81/confusion-matrix-visualization-fc31e3f30fea
#
group_names = ['True Negative','False Positive','False Negative','True Positive']
group_counts = ["{0:0.0f}".format(value) for value in cm.flatten()]
group_percentages = ["{0:.2%}".format(value) for value in cm.flatten()/np.sum(cm)]
labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in zip(group_names,group_counts,group_percentages)]
labels = np.asarray(labels).reshape(2,2)
plt.figure(figsize=(3.5, 2.0)) #default figsize is 6.4" wide x 4.8" tall, shrink to 3.5" wide 2.0" tall
sns.heatmap(cm, annot=labels, fmt='', cmap='Blues', cbar=False)
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.title("Confusion Matrix")
plt.show()
# use the .ravel function to pull out TN,TP,FN,TP
# https://analytics4all.org/2020/05/07/python-confusion-matrix/
TN, FP, FN, TP = cm.ravel()
# calculate different metrics
Accuracy = (( TP + TN) / ( TP + TN + FP + FN))
Sensitivity = TP / (TP + FN)
Specificity = TN / (TN + FP)
GeometricMean = math.sqrt(Sensitivity * Specificity)
# Precision is the ratio of true positive predictions to the total number of positive predictions made by the model
# average=binary for binary classification models, average=micro for multiclass classification, average=weighted to match classification_report
precision = precision_score(y_test_label, y_pred, average='weighted')
# Recall is the ratio of true positive predictions to the total number of actual positive instances in the data.
# average=binary for binary classification models, average=micro for multiclass classification, average=weighted to match classification_report
recall = recall_score(y_test_label, y_pred, average='weighted')
# F1-score is a metric that considers both precision and recall, providing a balance between the two.
# average=binary for binary classification models, average=micro for multiclass classification, average=weighted to match classification_report
f1 = f1_score(y_test_label, y_pred, average='weighted')
# add details below graph to help interpret results
print('\n\n')
print('Confusion matrix\n\n', cm)
print('\nTrue Negatives (TN) = ', TN)
print('False Positives (FP) = ', FP)
print('False Negatives (FN) = ', FN)
print('True Positives (TP) = ', TP)
print ('\n')
print ("Accuracy: ", Accuracy)
print ("Sensitivity: ", Sensitivity)
print ("Specificity: ", Specificity)
print ("Geometric Mean: ", GeometricMean)
print ('\n')
print ("Precision: ", precision)
print ("Recall: ", recall)
print ("f1-score: ", f1)
print('\n------------------------------------------------\n')
# We want TN and TP to be approximately equal, because this indicates the dataset is well balanced.
# If TN and TP are very different, it indicates imbalanced data, which can lead to low accuracy due to overfitting
#if (TN/TP*100 < 40 or TN/TP*100 > 60): #we want TN and TP to be approximately 50%, if the values are below 40% or over 60%, generate a warning
# print("WARNING: the confusion matrix shows that TN and TP are very imbalanced, may lead to low accuracy!")
#
return cm
# function to report on model accuracy (TP, FP, FN, FP), precision, recall, f1-score
def model_classification_report(cm, y_test_label, y_pred):
report = classification_report(y_test_label, y_pred, digits=4)
print('\n')
print("Classification Report: \n", report)
print('\n\n\n')
# function to show elapsed time for running notebook
# start a timer so we can calculate the total runtime of this notebook
notebook_start_time = time.time() #seconds since epoch
def show_elapsed_time():
#
# Get the current time as a struct_time object
current_time_struct = time.localtime()
# Format the struct_time as a string (yyyy-mm-dd HH:MM:SS format)
current_time_str = time.strftime("%Y-%m-%d %H:%M:%S", current_time_struct)
# Display the current time in HH:MM:SS format
print("Current Time:", current_time_str)
# show a running total of elapsed time for the entire notebook
notebook_end_time = time.time() #seconds since epoch
print(f"The entire notebook runtime so far is {(notebook_end_time-notebook_start_time)/60:.0f} minutes")
show_elapsed_time()
Current Time: 2024-01-03 10:04:50 The entire notebook runtime so far is 0 minutes
# initialize variables to avoid undef errors
accuracy_lr_undersampled_unoptimized = 0
accuracy_lr_undersampled_optimized = 0
accuracy_dt_undersampled_unoptimized = 0
accuracy_dt_undersampled_optimized = 0
accuracy_ds_undersampled_unoptimized = 0
accuracy_ds_undersampled_optimized = 0
accuracy_rf_undersampled_unoptimized = 0
accuracy_rf_undersampled_optimized = 0
accuracy_nb_undersampled_unoptimized = 0
accuracy_nb_undersampled_optimized = 0
accuracy_svm_undersampled_unoptimized = 0
accuracy_svm_undersampled_optimized = 0
accuracy_knn_undersampled_unoptimized = 0
accuracy_knn_undersampled_optimized = 0
accuracy_mlp_undersampled_unoptimized = 0
accuracy_mlp_undersampled_optimized = 0
accuracy_gb_undersampled_unoptimized = 0
accuracy_gb_undersampled_optimized = 0
accuracy_xgb_undersampled_unoptimized = 0
accuracy_xgb_undersampled_optimized = 0
best_params_lr = ""
best_params_dt = ""
best_params_ds = ""
best_params_rf = ""
best_params_nb = ""
best_params_svm = ""
best_params_knn = ""
best_params_mlp = ""
best_params_gb = ""
best_params_xgb = ""
accuracy_ensemble_voting = 0
accuracy_ensemble_stacking = 0
accuracy_ensemble_boosting = 0
accuracy_ensemble_bagging = 0
cv_count = 10 #number of cross-validation folds
# start a timer so we can calculate the total runtime of this notebook
notebook_start_time = time.time() #seconds since epoch
# define CSV source file
filename = 'merged_filtered.csv'
LAN_location = 'http://datasets.nyx.local:80/datasets/CIC_IOT_Dataset2023/csv' #high speed local copy on LAN
WAN_location = 'http://datasets.nyx.ca:8081/datasets/CIC_IOT_Dataset2023/csv' #accessible to entire internet
# Get the FQDN of the local machine
fqdn = socket.getfqdn()
ipv4_address = socket.gethostbyname(socket.gethostname())
print(f"Fully Qualified Domain Name (FQDN):{fqdn}, IPv4 address:{ipv4_address}")
if ( "nyx.local" in fqdn ):
# If inside the LAN, grab the local copy of the dataset
print(f"Detected Fully Qualified Domain Name of {fqdn}, dataset source is:\n{LAN_location}/{filename}")
dataset = f"{LAN_location}/{filename}"
else:
# If not inside the LAN, grab the dataset from an internet-accessible URL
print(f"Detected Fully Qualified Domain Name of {fqdn}, dataset source is:\n{WAN_location}/{filename}")
dataset = f"{WAN_location}/{filename}"
print(f"Loading dataset from {dataset}")
df = pd.read_csv(dataset)
Fully Qualified Domain Name (FQDN):DESKTOP-SNBGTFL.nyx.local, IPv4 address:192.168.14.136 Detected Fully Qualified Domain Name of DESKTOP-SNBGTFL.nyx.local, dataset source is: http://datasets.nyx.local:80/datasets/CIC_IOT_Dataset2023/csv/merged_filtered.csv Loading dataset from http://datasets.nyx.local:80/datasets/CIC_IOT_Dataset2023/csv/merged_filtered.csv
#view dimensions of dataset (rows and columns)
print ("Rows,columns in dataset:", df.shape)
Rows,columns in dataset: (2867733, 47)
print(f"Dropping rows from the dataset during debugging to speed up this notebook - turn this off when finished debugging!")
# cut dataset in half if > 2 million rows
if ( len(df) > 2000000):
print(f"Original size of dataset is", len(df), " rows")
df.drop(df.index[::2], inplace=True)
print(f"Dataset size after dropping all the even-numbered rows is", len(df), " rows")
# cut dataset in half if > 1 million rows
if ( len(df) > 1000000):
print(f"Original size of dataset is", len(df), " rows")
df.drop(df.index[::2], inplace=True)
print(f"Dataset size after dropping all the even-numbered rows is", len(df), " rows")
# cut dataset in half if > 0.5 million rows
if ( len(df) > 500000):
print(f"Original size of dataset is", len(df), " rows")
df.drop(df.index[::2], inplace=True)
print(f"Dataset size after dropping all the even-numbered rows is", len(df), " rows")
# cut dataset in half if > 0.5 million rows
if ( len(df) > 500000):
print(f"Original size of dataset is", len(df), " rows")
df.drop(df.index[::2], inplace=True)
print(f"Dataset size after dropping all the even-numbered rows is", len(df), " rows")
# cut dataset in half if > 250,000 rows
if ( len(df) > 250000):
print(f"Original size of dataset is", len(df), " rows")
df.drop(df.index[::2], inplace=True)
print(f"Dataset size after dropping all the even-numbered rows is", len(df), " rows")
# notebook runtime ~45 minutes with this many rows
# cut dataset in half if > 100,000 rows
if ( len(df) > 100000):
print(f"Original size of dataset is", len(df), " rows")
df.drop(df.index[::2], inplace=True)
print(f"Dataset size after dropping all the even-numbered rows is", len(df), " rows")
# notebook runtime ~13 minutes with this many rows
# cut dataset in half if > 50,000 rows
if ( len(df) > 50000):
print(f"Original size of dataset is", len(df), " rows")
df.drop(df.index[::2], inplace=True)
print(f"Dataset size after dropping all the even-numbered rows is", len(df), " rows")
# notebook runtime ~5 minutes with this many rows
# cut dataset in half if > 25,000 rows
if ( len(df) > 25000):
print(f"Original size of dataset is", len(df), " rows")
df.drop(df.index[::2], inplace=True)
print(f"Dataset size after dropping all the even-numbered rows is", len(df), " rows")
Dropping rows from the dataset during debugging to speed up this notebook - turn this off when finished debugging! Original size of dataset is 2867733 rows Dataset size after dropping all the even-numbered rows is 1433866 rows Original size of dataset is 1433866 rows Dataset size after dropping all the even-numbered rows is 716933 rows Original size of dataset is 716933 rows Dataset size after dropping all the even-numbered rows is 358466 rows Original size of dataset is 358466 rows Dataset size after dropping all the even-numbered rows is 179233 rows Original size of dataset is 179233 rows Dataset size after dropping all the even-numbered rows is 89616 rows Original size of dataset is 89616 rows Dataset size after dropping all the even-numbered rows is 44808 rows Original size of dataset is 44808 rows Dataset size after dropping all the even-numbered rows is 22404 rows
#view dimensions of dataset (rows and columns)
print ("Rows,columns in dataset:", df.shape)
Rows,columns in dataset: (22404, 47)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Current Time: 2024-01-03 10:06:09 The entire notebook runtime so far is 1 minutes
# take a quick look at the data
df.head()
| flow_duration | Header_Length | Protocol Type | Duration | Rate | Srate | Drate | fin_flag_number | syn_flag_number | rst_flag_number | ... | Std | Tot size | IAT | Number | Magnitue | Radius | Covariance | Variance | Weight | label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 127 | 0.000000 | 58.0 | 6.0 | 48.7 | 0.133304 | 0.133304 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 36.280658 | 58.0 | 1.664290e+08 | 13.5 | 11.643890 | 51.419103 | 1.331690e+03 | 1.0 | 244.6 | Recon-PortScan |
| 255 | 5.593900 | 5411305.3 | 6.0 | 60.1 | 664.243267 | 664.243267 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 399.101608 | 1085.6 | 6.970882e-04 | 5.5 | 24.248639 | 564.414907 | 3.430353e+05 | 0.5 | 38.5 | MITM-ArpSpoofing |
| 383 | 35.645644 | 18828.9 | 11.5 | 69.1 | 1.658343 | 1.658343 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 9.909081 | 71.2 | 1.808000e-02 | 5.5 | 11.558680 | 14.013556 | 1.669243e+02 | 0.8 | 38.5 | BenignTraffic |
| 511 | 11.567025 | 660581.8 | 6.0 | 114.1 | 139.341360 | 139.341360 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1168.350480 | 1426.7 | 1.665203e+08 | 13.5 | 47.582073 | 1653.261146 | 1.368075e+06 | 1.0 | 244.6 | BenignTraffic |
| 639 | 0.763520 | 462.3 | 5.3 | 44.8 | 6.108789 | 6.108789 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 9.848105 | 78.8 | 1.668999e-02 | 5.5 | 11.667452 | 13.927324 | 3.491656e+02 | 0.7 | 38.5 | BenignTraffic |
5 rows × 47 columns
# Display all the data rather than just a portion
#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
# check for any missing values in dataset
df.isna().sum()
flow_duration 0 Header_Length 0 Protocol Type 0 Duration 0 Rate 0 Srate 0 Drate 0 fin_flag_number 0 syn_flag_number 0 rst_flag_number 0 psh_flag_number 0 ack_flag_number 0 ece_flag_number 0 cwr_flag_number 0 ack_count 0 syn_count 0 fin_count 0 urg_count 0 rst_count 0 HTTP 0 HTTPS 0 DNS 0 Telnet 0 SMTP 0 SSH 0 IRC 0 TCP 0 UDP 0 DHCP 0 ARP 0 ICMP 0 IPv 0 LLC 0 Tot sum 0 Min 0 Max 0 AVG 0 Std 0 Tot size 0 IAT 0 Number 0 Magnitue 0 Radius 0 Covariance 0 Variance 0 Weight 0 label 0 dtype: int64
# check for any missing datatypes
get_type_missing(df)
| data_type | missing_values | |
|---|---|---|
| flow_duration | float64 | 0 |
| Max | float64 | 0 |
| TCP | float64 | 0 |
| UDP | float64 | 0 |
| DHCP | float64 | 0 |
| ARP | float64 | 0 |
| ICMP | float64 | 0 |
| IPv | float64 | 0 |
| LLC | float64 | 0 |
| Tot sum | float64 | 0 |
| Min | float64 | 0 |
| AVG | float64 | 0 |
| SSH | float64 | 0 |
| Std | float64 | 0 |
| Tot size | float64 | 0 |
| IAT | float64 | 0 |
| Number | float64 | 0 |
| Magnitue | float64 | 0 |
| Radius | float64 | 0 |
| Covariance | float64 | 0 |
| Variance | float64 | 0 |
| Weight | float64 | 0 |
| IRC | float64 | 0 |
| SMTP | float64 | 0 |
| Header_Length | float64 | 0 |
| ack_flag_number | float64 | 0 |
| Protocol Type | float64 | 0 |
| Duration | float64 | 0 |
| Rate | float64 | 0 |
| Srate | float64 | 0 |
| Drate | float64 | 0 |
| fin_flag_number | float64 | 0 |
| syn_flag_number | float64 | 0 |
| rst_flag_number | float64 | 0 |
| psh_flag_number | float64 | 0 |
| ece_flag_number | float64 | 0 |
| Telnet | float64 | 0 |
| cwr_flag_number | float64 | 0 |
| ack_count | float64 | 0 |
| syn_count | float64 | 0 |
| fin_count | float64 | 0 |
| urg_count | float64 | 0 |
| rst_count | float64 | 0 |
| HTTP | float64 | 0 |
| HTTPS | float64 | 0 |
| DNS | float64 | 0 |
| label | object | 0 |
df.describe()
| flow_duration | Header_Length | Protocol Type | Duration | Rate | Srate | Drate | fin_flag_number | syn_flag_number | rst_flag_number | ... | AVG | Std | Tot size | IAT | Number | Magnitue | Radius | Covariance | Variance | Weight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 22404.000000 | 2.240400e+04 | 22404.000000 | 22404.000000 | 2.240400e+04 | 2.240400e+04 | 22404.0 | 22404.000000 | 22404.000000 | 22404.000000 | ... | 22404.000000 | 22404.000000 | 22404.000000 | 2.240400e+04 | 22404.000000 | 22404.000000 | 22404.000000 | 2.240400e+04 | 22404.000000 | 22404.000000 |
| mean | 77.659999 | 1.141013e+06 | 10.742757 | 93.729650 | 2.202559e+03 | 2.202559e+03 | 0.0 | 0.000134 | 0.018434 | 0.012899 | ... | 556.928546 | 290.640687 | 557.775289 | 8.290062e+07 | 9.469998 | 29.564942 | 410.627969 | 3.433723e+05 | 0.605072 | 140.783107 |
| std | 857.470456 | 1.495270e+06 | 4.813685 | 45.812264 | 1.878298e+04 | 1.878298e+04 | 0.0 | 0.011571 | 0.134518 | 0.112843 | ... | 556.456304 | 468.571432 | 566.148160 | 6.842543e+07 | 3.287422 | 14.864468 | 663.019540 | 1.117793e+06 | 0.419716 | 84.642447 |
| min | 0.000000 | 0.000000e+00 | 0.000000 | 0.000000 | 0.000000e+00 | 0.000000e+00 | 0.0 | 0.000000 | 0.000000 | 0.000000 | ... | 46.000000 | 0.000000 | 46.000000 | 1.137073e-06 | 1.000000 | 9.591663 | 0.000000 | 0.000000e+00 | 0.000000 | 1.000000 |
| 25% | 1.633845 | 6.509265e+04 | 6.000000 | 64.000000 | 3.225728e+01 | 3.225728e+01 | 0.0 | 0.000000 | 0.000000 | 0.000000 | ... | 121.912944 | 5.423429 | 115.000000 | 1.182063e-02 | 5.500000 | 15.447983 | 7.480793 | 2.178834e+02 | 0.110000 | 38.500000 |
| 50% | 6.231715 | 5.928908e+05 | 8.300000 | 69.400000 | 8.856275e+01 | 8.856275e+01 | 0.0 | 0.000000 | 0.000000 | 0.000000 | ... | 542.957501 | 54.212345 | 544.120000 | 8.376743e+07 | 9.500000 | 32.877066 | 76.261703 | 4.890330e+03 | 0.900000 | 141.550000 |
| 75% | 37.302472 | 1.618047e+06 | 16.830000 | 110.700000 | 1.093230e+03 | 1.093230e+03 | 0.0 | 0.000000 | 0.000000 | 0.000000 | ... | 554.000000 | 457.264132 | 554.000000 | 1.665190e+08 | 13.500000 | 33.286634 | 645.699021 | 2.771234e+05 | 1.000000 | 244.600000 |
| max | 47526.775773 | 9.788621e+06 | 17.000000 | 255.000000 | 1.048612e+06 | 1.048612e+06 | 0.0 | 1.000000 | 1.000000 | 1.000000 | ... | 5623.555913 | 5851.521943 | 5572.000000 | 1.676394e+08 | 14.500000 | 104.307547 | 8275.301693 | 3.931555e+07 | 1.000000 | 244.600000 |
8 rows × 46 columns
# look at all the datatypes of that are objects, in case any can be converted to integers
df.describe(include='object')
| label | |
|---|---|
| count | 22404 |
| unique | 16 |
| top | BenignTraffic |
| freq | 8544 |
# look at the values in all of the features
feature_names = df.columns.tolist()
for feature_name in feature_names:
if feature_name in df.columns:
print('\n')
print(f"------------------")
print(f"{feature_name}")
print(f"------------------")
print(df[feature_name].value_counts())
------------------
flow_duration
------------------
0.000000 148
1.733584 1
14.110007 1
754.793418 1
29.564217 1
...
4.016203 1
0.342118 1
26.838830 1
40.190514 1
0.031879 1
Name: flow_duration, Length: 22257, dtype: int64
------------------
Header_Length
------------------
58.0 112
112.0 79
77.2 16
71.4 13
89.2 13
...
298.9 1
1366020.7 1
455177.9 1
2774275.0 1
103235.2 1
Name: Header_Length, Length: 21652, dtype: int64
------------------
Protocol Type
------------------
6.00 6117
17.00 4635
7.10 1866
8.20 999
16.83 640
...
11.45 1
7.53 1
12.25 1
13.88 1
6.53 1
Name: Protocol Type, Length: 489, dtype: int64
------------------
Duration
------------------
64.00 5416
63.36 623
65.91 358
63.60 242
93.50 170
...
244.90 1
218.40 1
87.19 1
22.40 1
58.28 1
Name: Duration, Length: 2430, dtype: int64
------------------
Rate
------------------
26.997670 3
26.999495 2
0.092480 2
155.229608 2
29.158700 2
..
176.297616 1
57.381313 1
736.212412 1
4239.349239 1
3625.050324 1
Name: Rate, Length: 22398, dtype: int64
------------------
Srate
------------------
26.997670 3
26.999495 2
0.092480 2
155.229608 2
29.158700 2
..
176.297616 1
57.381313 1
736.212412 1
4239.349239 1
3625.050324 1
Name: Srate, Length: 22398, dtype: int64
------------------
Drate
------------------
0.0 22404
Name: Drate, dtype: int64
------------------
fin_flag_number
------------------
0.0 22401
1.0 3
Name: fin_flag_number, dtype: int64
------------------
syn_flag_number
------------------
0.0 21991
1.0 413
Name: syn_flag_number, dtype: int64
------------------
rst_flag_number
------------------
0.0 22115
1.0 289
Name: rst_flag_number, dtype: int64
------------------
psh_flag_number
------------------
0.0 21871
1.0 533
Name: psh_flag_number, dtype: int64
------------------
ack_flag_number
------------------
1.0 11428
0.0 10976
Name: ack_flag_number, dtype: int64
------------------
ece_flag_number
------------------
0.0 22403
1.0 1
Name: ece_flag_number, dtype: int64
------------------
cwr_flag_number
------------------
0.0 22404
Name: cwr_flag_number, dtype: int64
------------------
ack_count
------------------
0.00 19911
0.20 556
0.10 450
0.40 277
0.50 221
...
3.80 1
0.34 1
0.42 1
0.38 1
2.70 1
Name: ack_count, Length: 61, dtype: int64
------------------
syn_count
------------------
0.000000 12018
2.000000 1327
1.800000 899
1.000000 792
0.200000 736
...
0.030769 1
3.700000 1
3.300000 1
1.520000 1
1.050000 1
Name: syn_count, Length: 181, dtype: int64
------------------
fin_count
------------------
0.00 21116
1.00 179
0.10 152
0.80 135
0.90 129
0.70 125
0.30 94
0.60 86
0.20 85
0.01 71
0.40 67
0.50 65
0.03 18
0.02 17
1.20 7
0.06 7
0.04 6
1.50 6
0.07 5
0.09 3
0.41 2
0.11 2
1.30 2
0.42 2
0.37 2
0.15 2
0.36 1
0.14 1
0.08 1
0.33 1
0.16 1
0.99 1
0.31 1
1.10 1
0.23 1
0.05 1
4.60 1
1.77 1
0.24 1
0.18 1
0.28 1
1.80 1
0.35 1
0.21 1
3.00 1
Name: fin_count, dtype: int64
------------------
urg_count
------------------
0.00 7649
0.01 252
0.02 130
0.03 92
0.20 87
...
371.60 1
438.50 1
415.30 1
13.68 1
1.44 1
Name: urg_count, Length: 4208, dtype: int64
------------------
rst_count
------------------
0.00 6407
0.01 365
0.02 247
1.00 199
0.03 160
...
1272.10 1
1685.40 1
79.80 1
1179.90 1
115.60 1
Name: rst_count, Length: 9278, dtype: int64
------------------
HTTP
------------------
0.0 21778
1.0 626
Name: HTTP, dtype: int64
------------------
HTTPS
------------------
0.0 13640
1.0 8764
Name: HTTPS, dtype: int64
------------------
DNS
------------------
0.0 22366
1.0 38
Name: DNS, dtype: int64
------------------
Telnet
------------------
0.0 22404
Name: Telnet, dtype: int64
------------------
SMTP
------------------
0.0 22404
Name: SMTP, dtype: int64
------------------
SSH
------------------
0.0 22388
1.0 16
Name: SSH, dtype: int64
------------------
IRC
------------------
0.0 22404
Name: IRC, dtype: int64
------------------
TCP
------------------
1.0 12255
0.0 10149
Name: TCP, dtype: int64
------------------
UDP
------------------
0.0 13445
1.0 8959
Name: UDP, dtype: int64
------------------
DHCP
------------------
0.0 22404
Name: DHCP, dtype: int64
------------------
ARP
------------------
0.0 22385
1.0 19
Name: ARP, dtype: int64
------------------
ICMP
------------------
0.0 22402
1.0 2
Name: ICMP, dtype: int64
------------------
IPv
------------------
1.0 22363
0.0 41
Name: IPv, dtype: int64
------------------
LLC
------------------
1.0 22363
0.0 41
Name: LLC, dtype: int64
------------------
Tot sum
------------------
5817.00 3421
363.00 355
1023.00 81
319.00 58
297.00 54
...
1495.60 1
5654.83 1
5764.75 1
1033.40 1
5866.20 1
Name: Tot sum, Length: 14266, dtype: int64
------------------
Min
------------------
66.00 3901
554.00 3571
54.00 1660
50.00 838
60.00 807
...
445.26 1
393.97 1
455.86 1
76.70 1
492.43 1
Name: Min, Length: 3808, dtype: int64
------------------
Max
------------------
554.00 6180
1514.00 789
2962.00 787
66.00 535
230.00 381
...
269.91 1
1202.00 1
1121.80 1
288.63 1
2023.20 1
Name: Max, Length: 5551, dtype: int64
------------------
AVG
------------------
554.000000 3451
66.000000 435
58.000000 106
54.000000 81
94.000000 81
...
206.320317 1
169.633219 1
95.656508 1
94.906071 1
960.904921 1
Name: AVG, Length: 16687, dtype: int64
------------------
Std
------------------
0.000000 4322
29.427518 56
26.957518 32
18.672668 31
6.901687 28
...
28.973711 1
1395.142549 1
1276.656207 1
611.081707 1
713.198385 1
Name: Std, Length: 16493, dtype: int64
------------------
Tot size
------------------
554.00 3453
66.00 692
549.06 503
1514.00 154
58.00 149
...
122.44 1
1531.20 1
1110.00 1
1807.80 1
3412.00 1
Name: Tot size, Length: 6831, dtype: int64
------------------
IAT
------------------
3.600121e-06 24
3.004074e-06 14
3.695488e-06 13
3.290176e-06 11
3.099442e-06 9
..
1.665249e+08 1
1.665242e+08 1
8.193612e-03 1
8.371153e+07 1
7.369518e-05 1
Name: IAT, Length: 22094, dtype: int64
------------------
Number
------------------
5.500000 7617
13.500000 7473
9.500000 7264
3.000000 4
8.500000 3
9.595960 3
1.500000 3
5.000000 3
3.500000 2
8.777778 2
9.055556 2
7.000000 2
8.857143 2
8.074074 2
8.863636 2
14.500000 2
9.054795 1
9.743590 1
8.071429 1
8.333333 1
1.000000 1
9.214286 1
8.976744 1
14.000000 1
8.925373 1
9.000000 1
11.000000 1
8.928571 1
9.057471 1
9.121622 1
4.500000 1
9.202128 1
9.142857 1
8.521739 1
Name: Number, dtype: int64
------------------
Magnitue
------------------
33.286634 3524
11.489125 444
10.770330 106
10.392305 83
13.711309 82
...
38.892103 1
31.310594 1
53.704679 1
23.215809 1
42.654666 1
Name: Magnitue, Length: 16595, dtype: int64
------------------
Radius
------------------
0.000000 4424
41.654185 63
9.797849 29
15.389575 29
11.597075 27
...
361.699022 1
40.975015 1
1973.029514 1
1805.464522 1
1008.614829 1
Name: Radius, Length: 16363, dtype: int64
------------------
Covariance
------------------
0.000000 4424
4890.329916 63
2550.973705 34
4280.239916 33
1322.901483 29
...
1947.189340 1
1098.446242 1
127583.133524 1
903.793550 1
581023.256350 1
Name: Covariance, Length: 16377, dtype: int64
------------------
Variance
------------------
1.000000 6970
0.900000 4456
0.000000 4424
0.800000 1000
0.700000 562
...
0.888889 1
0.445946 1
0.670000 1
0.760000 1
0.833333 1
Name: Variance, Length: 99, dtype: int64
------------------
Weight
------------------
38.500000 7617
244.600000 7473
141.550000 7262
11.000000 4
93.500000 3
139.333333 3
2.500000 3
31.666667 3
15.166667 2
127.044444 2
63.000000 2
117.166667 2
116.314286 2
110.037037 2
129.363636 2
215.500000 2
121.000000 1
135.923077 1
108.392857 1
119.208333 1
1.000000 1
127.821429 1
120.194444 1
132.000000 1
200.000000 1
130.100000 1
131.507692 1
127.561644 1
126.828571 1
128.486486 1
131.770115 1
128.850746 1
25.500000 1
131.265957 1
135.031746 1
123.695652 1
Name: Weight, dtype: int64
------------------
label
------------------
BenignTraffic 8544
Mirai-udpplain 7006
MITM-ArpSpoofing 2400
DNS_Spoofing 1423
Recon-HostDiscovery 1025
Recon-OSScan 791
Recon-PortScan 601
VulnerabilityScan 296
DictionaryBruteForce 103
CommandInjection 46
BrowserHijacking 43
SqlInjection 43
XSS 30
Backdoor_Malware 25
Recon-PingSweep 17
Uploading_Attack 11
Name: label, dtype: int64
#view dimensions of dataset (rows and columns)
print ("Rows,columns in dataset:", df.shape)
Rows,columns in dataset: (22404, 47)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 22404 entries, 127 to 2867711 Data columns (total 47 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 flow_duration 22404 non-null float64 1 Header_Length 22404 non-null float64 2 Protocol Type 22404 non-null float64 3 Duration 22404 non-null float64 4 Rate 22404 non-null float64 5 Srate 22404 non-null float64 6 Drate 22404 non-null float64 7 fin_flag_number 22404 non-null float64 8 syn_flag_number 22404 non-null float64 9 rst_flag_number 22404 non-null float64 10 psh_flag_number 22404 non-null float64 11 ack_flag_number 22404 non-null float64 12 ece_flag_number 22404 non-null float64 13 cwr_flag_number 22404 non-null float64 14 ack_count 22404 non-null float64 15 syn_count 22404 non-null float64 16 fin_count 22404 non-null float64 17 urg_count 22404 non-null float64 18 rst_count 22404 non-null float64 19 HTTP 22404 non-null float64 20 HTTPS 22404 non-null float64 21 DNS 22404 non-null float64 22 Telnet 22404 non-null float64 23 SMTP 22404 non-null float64 24 SSH 22404 non-null float64 25 IRC 22404 non-null float64 26 TCP 22404 non-null float64 27 UDP 22404 non-null float64 28 DHCP 22404 non-null float64 29 ARP 22404 non-null float64 30 ICMP 22404 non-null float64 31 IPv 22404 non-null float64 32 LLC 22404 non-null float64 33 Tot sum 22404 non-null float64 34 Min 22404 non-null float64 35 Max 22404 non-null float64 36 AVG 22404 non-null float64 37 Std 22404 non-null float64 38 Tot size 22404 non-null float64 39 IAT 22404 non-null float64 40 Number 22404 non-null float64 41 Magnitue 22404 non-null float64 42 Radius 22404 non-null float64 43 Covariance 22404 non-null float64 44 Variance 22404 non-null float64 45 Weight 22404 non-null float64 46 label 22404 non-null object dtypes: float64(46), object(1) memory usage: 8.0+ MB
# look at the names of all the columns
df.columns
Index(['flow_duration', 'Header_Length', 'Protocol Type', 'Duration', 'Rate',
'Srate', 'Drate', 'fin_flag_number', 'syn_flag_number',
'rst_flag_number', 'psh_flag_number', 'ack_flag_number',
'ece_flag_number', 'cwr_flag_number', 'ack_count', 'syn_count',
'fin_count', 'urg_count', 'rst_count', 'HTTP', 'HTTPS', 'DNS', 'Telnet',
'SMTP', 'SSH', 'IRC', 'TCP', 'UDP', 'DHCP', 'ARP', 'ICMP', 'IPv', 'LLC',
'Tot sum', 'Min', 'Max', 'AVG', 'Std', 'Tot size', 'IAT', 'Number',
'Magnitue', 'Radius', 'Covariance', 'Variance', 'Weight', 'label'],
dtype='object')
# Rename the column "label" to "Attack_type"
# the final column contains the text-based labels for the data (1=normal -1=attack)
# the column name is this ugly value: Attack LABLE (1:No Attack, -1:Attack)
# let's rename that column to: LABEL
if 'label' in df.columns:
df.rename(columns={'label' : 'Attack_type' }, inplace=True)
# This feature has an embedded space, let's replace that with an underscore
if 'Protocol Type' in df.columns:
df.rename(columns={'Protocol Type' : 'Protocol_Type' }, inplace=True)
# This feature has an embedded space, let's replace that with an underscore
if 'Tot sum' in df.columns:
df.rename(columns={'Tot sum' : 'Tot_sum' }, inplace=True)
# confirm the column was renamed from "label" to "Attack_type"
df.head()
| flow_duration | Header_Length | Protocol_Type | Duration | Rate | Srate | Drate | fin_flag_number | syn_flag_number | rst_flag_number | ... | Std | Tot size | IAT | Number | Magnitue | Radius | Covariance | Variance | Weight | Attack_type | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 127 | 0.000000 | 58.0 | 6.0 | 48.7 | 0.133304 | 0.133304 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 36.280658 | 58.0 | 1.664290e+08 | 13.5 | 11.643890 | 51.419103 | 1.331690e+03 | 1.0 | 244.6 | Recon-PortScan |
| 255 | 5.593900 | 5411305.3 | 6.0 | 60.1 | 664.243267 | 664.243267 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 399.101608 | 1085.6 | 6.970882e-04 | 5.5 | 24.248639 | 564.414907 | 3.430353e+05 | 0.5 | 38.5 | MITM-ArpSpoofing |
| 383 | 35.645644 | 18828.9 | 11.5 | 69.1 | 1.658343 | 1.658343 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 9.909081 | 71.2 | 1.808000e-02 | 5.5 | 11.558680 | 14.013556 | 1.669243e+02 | 0.8 | 38.5 | BenignTraffic |
| 511 | 11.567025 | 660581.8 | 6.0 | 114.1 | 139.341360 | 139.341360 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1168.350480 | 1426.7 | 1.665203e+08 | 13.5 | 47.582073 | 1653.261146 | 1.368075e+06 | 1.0 | 244.6 | BenignTraffic |
| 639 | 0.763520 | 462.3 | 5.3 | 44.8 | 6.108789 | 6.108789 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 9.848105 | 78.8 | 1.668999e-02 | 5.5 | 11.667452 | 13.927324 | 3.491656e+02 | 0.7 | 38.5 | BenignTraffic |
5 rows × 47 columns
# The final column in the dataset is Attack_type, and will contain one of these values:
# Display unique values in the "Attack_type" column
unique_attack_types = df['Attack_type'].unique()
print("Unique Attack Types:")
print(unique_attack_types)
Unique Attack Types: ['Recon-PortScan' 'MITM-ArpSpoofing' 'BenignTraffic' 'VulnerabilityScan' 'Mirai-udpplain' 'BrowserHijacking' 'Backdoor_Malware' 'DNS_Spoofing' 'Recon-HostDiscovery' 'Recon-OSScan' 'DictionaryBruteForce' 'SqlInjection' 'CommandInjection' 'XSS' 'Uploading_Attack' 'Recon-PingSweep']
# Let's change the text "BenignTraffic" in the Attack_type column to "Normal"
df['Attack_type'] = df['Attack_type'].replace('BenignTraffic', 'Normal')
# Add a column called "Attack_label", which will be 0 if the Attack_type is BenignTraffic, and 1 if anything else
# This column will be used to separate the data into 2 classes (normal + abnormal)
# Add a new column 'Label' where Normal is 0, anything else is 1
df['Attack_label'] = df['Attack_type'].apply(lambda x: 0 if x == 'Normal' else 1)
# confirm there is a new column called "Attack_label" which contains 0 for BenignTraffic, 1 for any type of attack
df.head()
| flow_duration | Header_Length | Protocol_Type | Duration | Rate | Srate | Drate | fin_flag_number | syn_flag_number | rst_flag_number | ... | Tot size | IAT | Number | Magnitue | Radius | Covariance | Variance | Weight | Attack_type | Attack_label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 127 | 0.000000 | 58.0 | 6.0 | 48.7 | 0.133304 | 0.133304 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 58.0 | 1.664290e+08 | 13.5 | 11.643890 | 51.419103 | 1.331690e+03 | 1.0 | 244.6 | Recon-PortScan | 1 |
| 255 | 5.593900 | 5411305.3 | 6.0 | 60.1 | 664.243267 | 664.243267 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1085.6 | 6.970882e-04 | 5.5 | 24.248639 | 564.414907 | 3.430353e+05 | 0.5 | 38.5 | MITM-ArpSpoofing | 1 |
| 383 | 35.645644 | 18828.9 | 11.5 | 69.1 | 1.658343 | 1.658343 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 71.2 | 1.808000e-02 | 5.5 | 11.558680 | 14.013556 | 1.669243e+02 | 0.8 | 38.5 | Normal | 0 |
| 511 | 11.567025 | 660581.8 | 6.0 | 114.1 | 139.341360 | 139.341360 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1426.7 | 1.665203e+08 | 13.5 | 47.582073 | 1653.261146 | 1.368075e+06 | 1.0 | 244.6 | Normal | 0 |
| 639 | 0.763520 | 462.3 | 5.3 | 44.8 | 6.108789 | 6.108789 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 78.8 | 1.668999e-02 | 5.5 | 11.667452 | 13.927324 | 3.491656e+02 | 0.7 | 38.5 | Normal | 0 |
5 rows × 48 columns
# how many 0 (normal) and 1 (attack) values do we have?
df['Attack_label'].value_counts()
1 13860 0 8544 Name: Attack_label, dtype: int64
plt.figure(figsize=(15, 6))
sns.countplot(data=df, x='Attack_label', hue='Attack_type', edgecolor='black', linewidth=1)
plt.title('Attack Label vs Attack Type', fontsize=20)
plt.show()
import plotly.express as px
fig = px.pie(df, names='Attack_label', title='Distribution of Attack Labels')
fig.show()
fig = px.pie(df, names='Attack_type', title='Distribution of Attack Type')
fig.show()
Now using our domain knowledge we will only select useful features from our dataset and drop the rest
#view dimensions of dataset (rows and columns)
print ("Rows,columns in dataset:", df.shape)
Rows,columns in dataset: (22404, 48)
# Identifying columns that are entirely NaN (empty) or have all zero values
empty_or_zero_columns = df.columns[(df.isnull().all())
| (df == 0).all() | (df == 1).all() | (df == 1.0).all()
| (df == 0.0).all() | (df == 2).all() | (df == 2.0).all()]
# Displaying the identified columns
empty_features = empty_or_zero_columns.tolist()
print("These columns are all empty features:")
print(empty_features)
for feature in empty_features:
if feature in df.columns:
df.drop(feature, axis=1, inplace=True)
print("Dropping empty feature:", feature)
These columns are all empty features: ['Drate', 'cwr_flag_number', 'Telnet', 'SMTP', 'IRC', 'DHCP'] Dropping empty feature: Drate Dropping empty feature: cwr_flag_number Dropping empty feature: Telnet Dropping empty feature: SMTP Dropping empty feature: IRC Dropping empty feature: DHCP
# show the columns to confirm the features have been dropped
df.head()
| flow_duration | Header_Length | Protocol_Type | Duration | Rate | Srate | fin_flag_number | syn_flag_number | rst_flag_number | psh_flag_number | ... | Tot size | IAT | Number | Magnitue | Radius | Covariance | Variance | Weight | Attack_type | Attack_label | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 127 | 0.000000 | 58.0 | 6.0 | 48.7 | 0.133304 | 0.133304 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 58.0 | 1.664290e+08 | 13.5 | 11.643890 | 51.419103 | 1.331690e+03 | 1.0 | 244.6 | Recon-PortScan | 1 |
| 255 | 5.593900 | 5411305.3 | 6.0 | 60.1 | 664.243267 | 664.243267 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1085.6 | 6.970882e-04 | 5.5 | 24.248639 | 564.414907 | 3.430353e+05 | 0.5 | 38.5 | MITM-ArpSpoofing | 1 |
| 383 | 35.645644 | 18828.9 | 11.5 | 69.1 | 1.658343 | 1.658343 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 71.2 | 1.808000e-02 | 5.5 | 11.558680 | 14.013556 | 1.669243e+02 | 0.8 | 38.5 | Normal | 0 |
| 511 | 11.567025 | 660581.8 | 6.0 | 114.1 | 139.341360 | 139.341360 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1426.7 | 1.665203e+08 | 13.5 | 47.582073 | 1653.261146 | 1.368075e+06 | 1.0 | 244.6 | Normal | 0 |
| 639 | 0.763520 | 462.3 | 5.3 | 44.8 | 6.108789 | 6.108789 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 78.8 | 1.668999e-02 | 5.5 | 11.667452 | 13.927324 | 3.491656e+02 | 0.7 | 38.5 | Normal | 0 |
5 rows × 42 columns
#view dimensions of dataset (rows and columns)
print ("Rows,columns in dataset:", df.shape)
Rows,columns in dataset: (22404, 42)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Attack_label'] = le.fit_transform(df['Attack_label'])
df['Attack_label'].value_counts()
1 13860 0 8544 Name: Attack_label, dtype: int64
# The final column in the dataset is Attack_type, and will contain one of these values:
# Display unique values in the "Attack_type" column
unique_attack_types = df['Attack_type'].unique()
print("Unique Attack Types:")
print(unique_attack_types)
Unique Attack Types: ['Recon-PortScan' 'MITM-ArpSpoofing' 'Normal' 'VulnerabilityScan' 'Mirai-udpplain' 'BrowserHijacking' 'Backdoor_Malware' 'DNS_Spoofing' 'Recon-HostDiscovery' 'Recon-OSScan' 'DictionaryBruteForce' 'SqlInjection' 'CommandInjection' 'XSS' 'Uploading_Attack' 'Recon-PingSweep']
# separate X and y variables (independent and dependent variables)
X = df.drop(['Attack_label', 'Attack_type'], axis=1)
y_label = df['Attack_label']
y_type = df['Attack_type']
X
| flow_duration | Header_Length | Protocol_Type | Duration | Rate | Srate | fin_flag_number | syn_flag_number | rst_flag_number | psh_flag_number | ... | AVG | Std | Tot size | IAT | Number | Magnitue | Radius | Covariance | Variance | Weight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 127 | 0.000000 | 58.00 | 6.0 | 48.7 | 0.133304 | 0.133304 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 67.764062 | 36.280658 | 58.0 | 1.664290e+08 | 13.5 | 11.643890 | 51.419103 | 1.331690e+03 | 1.0 | 244.60 |
| 255 | 5.593900 | 5411305.30 | 6.0 | 60.1 | 664.243267 | 664.243267 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 377.483016 | 399.101608 | 1085.6 | 6.970882e-04 | 5.5 | 24.248639 | 564.414907 | 3.430353e+05 | 0.5 | 38.50 |
| 383 | 35.645644 | 18828.90 | 11.5 | 69.1 | 1.658343 | 1.658343 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 66.829683 | 9.909081 | 71.2 | 1.808000e-02 | 5.5 | 11.558680 | 14.013556 | 1.669243e+02 | 0.8 | 38.50 |
| 511 | 11.567025 | 660581.80 | 6.0 | 114.1 | 139.341360 | 139.341360 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1128.159349 | 1168.350480 | 1426.7 | 1.665203e+08 | 13.5 | 47.582073 | 1653.261146 | 1.368075e+06 | 1.0 | 244.60 |
| 639 | 0.763520 | 462.30 | 5.3 | 44.8 | 6.108789 | 6.108789 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 68.184603 | 9.848105 | 78.8 | 1.668999e-02 | 5.5 | 11.667452 | 13.927324 | 3.491656e+02 | 0.7 | 38.50 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2867199 | 10.764463 | 7412625.00 | 6.0 | 116.0 | 412.050245 | 412.050245 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1598.890495 | 1702.684726 | 3412.0 | 1.666102e+08 | 13.5 | 56.086108 | 2401.320465 | 2.886573e+06 | 1.0 | 244.60 |
| 2867327 | 1.952108 | 2061356.44 | 17.0 | 64.0 | 2446.039638 | 2446.039638 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 554.000000 | 0.000000 | 554.0 | 8.378906e+07 | 9.5 | 33.286634 | 0.000000 | 0.000000e+00 | 0.0 | 141.55 |
| 2867455 | 1.829292 | 230677.60 | 6.0 | 64.0 | 101.404379 | 101.404379 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 1672.647937 | 362.087807 | 1658.8 | 3.638983e-04 | 5.5 | 57.801193 | 512.069487 | 1.901665e+05 | 0.7 | 38.50 |
| 2867583 | 0.039189 | 83.80 | 6.0 | 90.4 | 67.493621 | 67.493621 | 0.0 | 1.0 | 0.0 | 0.0 | ... | 58.818962 | 10.878529 | 56.8 | 1.668467e+08 | 13.5 | 10.846278 | 15.419383 | 1.198071e+02 | 1.0 | 244.60 |
| 2867711 | 0.031879 | 103235.20 | 6.0 | 99.4 | 3625.050324 | 3625.050324 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 960.904921 | 713.198385 | 1054.4 | 7.369518e-05 | 5.5 | 42.654666 | 1008.614829 | 5.810233e+05 | 0.9 | 38.50 |
22404 rows × 40 columns
y_label
127 1
255 1
383 0
511 0
639 0
..
2867199 1
2867327 1
2867455 0
2867583 1
2867711 0
Name: Attack_label, Length: 22404, dtype: int64
y_type
127 Recon-PortScan
255 MITM-ArpSpoofing
383 Normal
511 Normal
639 Normal
...
2867199 MITM-ArpSpoofing
2867327 Mirai-udpplain
2867455 Normal
2867583 Recon-HostDiscovery
2867711 Normal
Name: Attack_type, Length: 22404, dtype: object
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Current Time: 2024-01-03 10:06:11 The entire notebook runtime so far is 1 minutes
X_train, X_test, y_train_label, y_test_label = train_test_split(X, y_label, test_size=0.2, random_state=42)
# Initialize RandomUnderSampler
rus = RandomUnderSampler(sampling_strategy=1, random_state=42)
# Apply Random Under Sampling
X_train_resampled, y_train_label_resampled = rus.fit_resample(X_train, y_train_label)
# If you wanted to balance the classes with SMOTE instead, sample code shown below:
## Create an instance of the SMOTE class
#smote = SMOTE(sampling_strategy='auto')
## Apply SMOTE to the training data
#X_train_resampled, y_train_type_resampled = smote.fit_resample(X_train, y_train_type)
print("Class balance before resampling")
print(y_train_label.value_counts())
print('\n')
print("Class balance after resampling")
print(y_train_label_resampled.value_counts())
Class balance before resampling 1 11098 0 6825 Name: Attack_label, dtype: int64 Class balance after resampling 0 6825 1 6825 Name: Attack_label, dtype: int64
# BUG ALERT - classes are not balanced!
# I think this is because we are using label encoding of the Attack_type feature, not the Attack_label feature
# confirm the classes are balanced
# the final column in the dataframe is named "Attack_label", and will be 0 f the data is normal,
# or 1 if the data indicates an attack.
# Figure out how many rows of each class exist in the dataframe
normal_class = (df[(df['Attack_label'] == 0)])
print("Number of rows in normal class:", (len(normal_class)) )
abnormal_class = (df[(df['Attack_label'] == 1)])
print(f"Number of rows in abnormal class:", (len(abnormal_class)) )
total_rows = len(abnormal_class) + len(normal_class)
print(f"Total Number of rows (normal+abnormal): {total_rows}" )
balance = len(abnormal_class) / total_rows * 100
balance = round(balance,2)
print(f"Percentage of abnormal class in dataset (abnormal/total*100): {balance}%")
if (balance < 10): print("This dataset is very imbalanced, please beware of overfitting.")
if (balance == 50): print("This dataset is perfectly balanced.")
Number of rows in normal class: 8544 Number of rows in abnormal class: 13860 Total Number of rows (normal+abnormal): 22404 Percentage of abnormal class in dataset (abnormal/total*100): 61.86%
is_data_scaled = "yes" #yes|no flag to turn feature scaling on|off to see if it changes prediction accuracy
if (is_data_scaled == "yes"):
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_resampled)
X_test_scaled = scaler.transform(X_test) # Only transform the test set, don't fit
# Save the values under original names so we can use consistent names in subsequent sections
X_train_resampled = X_train_scaled
X_test = X_test_scaled
else:
print(f"WARNING: dataset is not being scaled, so the results may be skewed due to data distribution!")
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Current Time: 2024-01-03 10:06:11 The entire notebook runtime so far is 1 minutes
# Create an instance of the LogisticRegression model
clf = LogisticRegression()
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_lr_undersampled_unoptimized = accuracy
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'C': 1.0, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}
Accuracy: 0.8502566391430484
Current Time: 2024-01-03 10:06:11
The entire notebook runtime so far is 1 minutes
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py:460: ConvergenceWarning:
lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
# call previously defined function to create confusion matrix
# We want to see approximately equal results from TN and TP
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
Confusion matrix
[[1622 97]
[ 574 2188]]
True Negatives (TN) = 1622
False Positives (FP) = 97
False Negatives (FN) = 574
True Positives (TP) = 2188
Accuracy: 0.8502566391430484
Sensitivity: 0.7921795800144823
Specificity: 0.9435718440954043
Geometric Mean: 0.8645683010433519
Precision: 0.8735619978914451
Recall: 0.8502566391430484
f1-score: 0.8523026941974848
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7386 0.9436 0.8286 1719
1 0.9575 0.7922 0.8670 2762
accuracy 0.8503 4481
macro avg 0.8481 0.8679 0.8478 4481
weighted avg 0.8736 0.8503 0.8523 4481
# Create an instance of the model
clf = LogisticRegression()
# Define the hyperparameters to tune
param_grid = {
'penalty': ['l1', 'l2'],
'C': [0.001, 0.01, 0.1, 1, 10, 100],
'solver': ['liblinear', 'saga'],
'max_iter': [100, 200, 300],
'random_state': [42] #for reproducible results
}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, n_jobs=-1)
# Fit the grid search to the training data
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of the model with the best hyperparameters
clf = LogisticRegression(**best_params)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = accuracy_score(y_test_label, y_pred)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_lr_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_lr = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Best Parameters: {'C': 100, 'max_iter': 100, 'penalty': 'l1', 'random_state': 42, 'solver': 'liblinear'}
Best Scores: 0.8645421245421246
Cross validation scores: [0.86520147 0.85128205 0.86666667 0.87545788 0.85274725 0.86153846
0.86373626 0.86666667 0.86886447 0.87326007]
Mean cross validation score: 0.8645421245421246
Standard Deviation cross validation score: 0.007402168219686316
Accuracy: 0.8522651193929927
Confusion matrix
[[1621 98]
[ 564 2198]]
True Negatives (TN) = 1621
False Positives (FP) = 98
False Negatives (FN) = 564
True Positives (TP) = 2198
Accuracy: 0.8522651193929927
Sensitivity: 0.7958001448225923
Specificity: 0.9429901105293775
Geometric Mean: 0.8662745907191038
Precision: 0.8746697923725928
Recall: 0.8522651193929927
f1-score: 0.8542767866231724
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7419 0.9430 0.8304 1719
1 0.9573 0.7958 0.8691 2762
accuracy 0.8523 4481
macro avg 0.8496 0.8694 0.8498 4481
weighted avg 0.8747 0.8523 0.8543 4481
Current Time: 2024-01-03 10:08:26
The entire notebook runtime so far is 4 minutes
# Create an instance of the DecisionTreeClassifier model
clf = DecisionTreeClassifier()
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_dt_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': None, 'splitter': 'best'}
Accuracy: 0.9268020531131443
Confusion matrix
[[1590 129]
[ 199 2563]]
True Negatives (TN) = 1590
False Positives (FP) = 129
False Negatives (FN) = 199
True Positives (TP) = 2563
Accuracy: 0.9268020531131443
Sensitivity: 0.9279507603186097
Specificity: 0.924956369982548
Geometric Mean: 0.926452355378757
Precision: 0.9277911380343191
Recall: 0.9268020531131443
f1-score: 0.9270626295023451
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8888 0.9250 0.9065 1719
1 0.9521 0.9280 0.9399 2762
accuracy 0.9268 4481
macro avg 0.9204 0.9265 0.9232 4481
weighted avg 0.9278 0.9268 0.9271 4481
Current Time: 2024-01-03 10:08:27
The entire notebook runtime so far is 4 minutes
# Create an instance of the DecisionTreeClassifier model
clf = DecisionTreeClassifier()
# Define the hyperparameters to tune
param_grid = {
'criterion': ['gini', 'entropy'],
'max_depth': [None, 5, 10, 15],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4],
'random_state': [42] #for reproducible results
}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count,n_jobs=-1)
# Fit the grid search to the training data
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of the model with the best hyperparameters
clf = DecisionTreeClassifier(**best_params)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = accuracy_score(y_test_label, y_pred)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_dt_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_dt = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Best Parameters: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10, 'random_state': 42}
Best Scores: 0.9424908424908425
Cross validation scores: [0.95238095 0.93846154 0.93919414 0.95018315 0.94358974 0.94139194
0.93260073 0.94798535 0.94065934 0.93846154]
Mean cross validation score: 0.9424908424908425
Standard Deviation cross validation score: 0.00578012228500346
Accuracy: 0.9366212898906494
Confusion matrix
[[1663 56]
[ 228 2534]]
True Negatives (TN) = 1663
False Positives (FP) = 56
False Negatives (FN) = 228
True Positives (TP) = 2534
Accuracy: 0.9366212898906494
Sensitivity: 0.9174511223750905
Specificity: 0.9674229203025014
Geometric Mean: 0.9421057499256215
Precision: 0.940419396299898
Recall: 0.9366212898906494
f1-score: 0.9371127255782894
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8794 0.9674 0.9213 1719
1 0.9784 0.9175 0.9469 2762
accuracy 0.9366 4481
macro avg 0.9289 0.9424 0.9341 4481
weighted avg 0.9404 0.9366 0.9371 4481
Current Time: 2024-01-03 10:09:05
The entire notebook runtime so far is 4 minutes
Decision Stump is a special case of the Decision Tree classifier with max_depth=1
The term "Decision Stump" typically refers to a decision tree with only one level, meaning it makes decisions based on a single feature.
The main hyperparameters for a decision stump are usually the splitting criterion and the choice of the feature to split on.
However, since decision stumps are simple, there might not be a lot of hyperparameters to optimize compared to more complex models.
# Create an instance of the DecisionTreeClassifier model
clf = DecisionTreeClassifier(max_depth=1)
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_ds_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 1, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': None, 'splitter': 'best'}
Accuracy: 0.7623298370899353
Confusion matrix
[[1684 35]
[1030 1732]]
True Negatives (TN) = 1684
False Positives (FP) = 35
False Negatives (FN) = 1030
True Positives (TP) = 1732
Accuracy: 0.7623298370899353
Sensitivity: 0.6270818247646633
Specificity: 0.9796393251890634
Geometric Mean: 0.7837818673909095
Precision: 0.8422020797347667
Recall: 0.7623298370899353
f1-score: 0.762895215122213
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.6205 0.9796 0.7598 1719
1 0.9802 0.6271 0.7648 2762
accuracy 0.7623 4481
macro avg 0.8003 0.8034 0.7623 4481
weighted avg 0.8422 0.7623 0.7629 4481
Current Time: 2024-01-03 10:09:05
The entire notebook runtime so far is 4 minutes
# check to see if there is any benefit to using Decision Stump instead of Decision Tree
if (accuracy_ds_undersampled_unoptimized < accuracy_dt_undersampled_unoptimized):
print(f"NOTE: Decision Stump is a special case of Decision Tree with max_depth=1, but does not seem to be beneficial for this dataset.")
print(f"Decision Tree accuracy is {accuracy_dt_undersampled_unoptimized*100:.2f}%, while Decision Stump accuracy is only {accuracy_ds_undersampled_unoptimized*100:.2f}%")
NOTE: Decision Stump is a special case of Decision Tree with max_depth=1, but does not seem to be beneficial for this dataset. Decision Tree accuracy is 92.68%, while Decision Stump accuracy is only 76.23%
Remember that decision stumps are very simple models, and hyperparameter tuning might not have as much impact as it would on more complex models. It's always a good practice to experiment and validate the performance on a validation set or through cross-validation.
# Create an instance of the DecisionTreeClassifier model with max_depth=1
clf = DecisionTreeClassifier(max_depth=1)
# Define the hyperparameters to tune
param_grid = {
'criterion': ['gini', 'entropy'],
'max_depth': [1],
'min_samples_split': [2, 5, 10],
'min_samples_leaf': [1, 2, 4],
'random_state': [42] #for reproducible results
}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count,n_jobs=-1)
# Fit the grid search to the training data
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of the model with the best hyperparameters
clf = DecisionTreeClassifier(**best_params)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = accuracy_score(y_test_label, y_pred)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_ds_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_ds = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Best Parameters: {'criterion': 'gini', 'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 42}
Best Scores: 0.8045421245421245
Cross validation scores: [0.80879121 0.78315018 0.80732601 0.82417582 0.80805861 0.81465201
0.7992674 0.81098901 0.78827839 0.8007326 ]
Mean cross validation score: 0.8045421245421245
Standard Deviation cross validation score: 0.011540728889386202
Accuracy: 0.7623298370899353
Confusion matrix
[[1684 35]
[1030 1732]]
True Negatives (TN) = 1684
False Positives (FP) = 35
False Negatives (FN) = 1030
True Positives (TP) = 1732
Accuracy: 0.7623298370899353
Sensitivity: 0.6270818247646633
Specificity: 0.9796393251890634
Geometric Mean: 0.7837818673909095
Precision: 0.8422020797347667
Recall: 0.7623298370899353
f1-score: 0.762895215122213
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.6205 0.9796 0.7598 1719
1 0.9802 0.6271 0.7648 2762
accuracy 0.7623 4481
macro avg 0.8003 0.8034 0.7623 4481
weighted avg 0.8422 0.7623 0.7629 4481
Current Time: 2024-01-03 10:09:07
The entire notebook runtime so far is 4 minutes
# check to see if there is any benefit to using Decision Stump instead of Decision Tree
if (accuracy_ds_undersampled_optimized < accuracy_dt_undersampled_optimized):
print(f"NOTE: Decision Stump is a special case of Decision Tree with max_depth=1, but does not seem to be beneficial for this dataset.")
print(f"Decision Tree accuracy is {accuracy_dt_undersampled_optimized*100:.2f}%, while Decision Stump accuracy is only {accuracy_ds_undersampled_optimized*100:.2f}%")
NOTE: Decision Stump is a special case of Decision Tree with max_depth=1, but does not seem to be beneficial for this dataset. Decision Tree accuracy is 93.66%, while Decision Stump accuracy is only 76.23%
# Create an instance of the RandomForestClassifier model
clf = RandomForestClassifier(n_jobs=-1, random_state=42)
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_rf_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': -1, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}
Accuracy: 0.9486721713903147
Confusion matrix
[[1668 51]
[ 179 2583]]
True Negatives (TN) = 1668
False Positives (FP) = 51
False Negatives (FN) = 179
True Positives (TP) = 2583
Accuracy: 0.9486721713903147
Sensitivity: 0.9351918899348298
Specificity: 0.9703315881326352
Geometric Mean: 0.9525997227425714
Precision: 0.9508874405582389
Recall: 0.9486721713903147
f1-score: 0.9489845855259785
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.9031 0.9703 0.9355 1719
1 0.9806 0.9352 0.9574 2762
accuracy 0.9487 4481
macro avg 0.9419 0.9528 0.9464 4481
weighted avg 0.9509 0.9487 0.9490 4481
Current Time: 2024-01-03 10:09:09
The entire notebook runtime so far is 4 minutes
# Create an instance of the RandomForestClassifier model
clf = RandomForestClassifier(n_jobs=-1)
# Define the hyperparameters to tune
param_grid = {
'n_estimators': [100, 200, 300],
'max_depth': [5, 10],
'random_state': [42] #for reproducible results
}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, n_jobs=-1)
# Fit the grid search to the training data
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of the model with the best hyperparameters
clf = RandomForestClassifier(**best_params)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_rf_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_rf = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Best Parameters: {'max_depth': 10, 'n_estimators': 300, 'random_state': 42}
Best Scores: 0.9376556776556777
Cross validation scores: [0.94578755 0.93699634 0.94139194 0.94725275 0.93333333 0.93846154
0.92454212 0.94505495 0.92967033 0.93406593]
Mean cross validation score: 0.9376556776556777
Standard Deviation cross validation score: 0.007037925602416076
Accuracy: 0.9321580004463289
Confusion matrix
[[1683 36]
[ 268 2494]]
True Negatives (TN) = 1683
False Positives (FP) = 36
False Negatives (FN) = 268
True Positives (TP) = 2494
Accuracy: 0.9321580004463289
Sensitivity: 0.9029688631426502
Specificity: 0.9790575916230366
Geometric Mean: 0.9402438622288553
Precision: 0.9385332737039921
Recall: 0.9321580004463289
f1-score: 0.9328152368022711
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8626 0.9791 0.9172 1719
1 0.9858 0.9030 0.9426 2762
accuracy 0.9322 4481
macro avg 0.9242 0.9410 0.9299 4481
weighted avg 0.9385 0.9322 0.9328 4481
Current Time: 2024-01-03 10:12:27
The entire notebook runtime so far is 8 minutes
# Create an instance of the model
#clf = GaussianNB() # suitable for continuous features
#clf = MultinomialNB() # used for discrete data like word counts
clf = BernoulliNB() # suitable for binary data, gives best accuracy for this dataset
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_nb_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'alpha': 1.0, 'binarize': 0.0, 'class_prior': None, 'fit_prior': True, 'force_alpha': 'warn'}
Accuracy: 0.7723722383396563
Confusion matrix
[[1467 252]
[ 768 1994]]
True Negatives (TN) = 1467
False Positives (FP) = 252
False Negatives (FN) = 768
True Positives (TP) = 1994
Accuracy: 0.7723722383396563
Sensitivity: 0.721940622737147
Specificity: 0.8534031413612565
Geometric Mean: 0.7849244519825987
Precision: 0.7990214556581589
Recall: 0.7723722383396563
f1-score: 0.7754982065671145
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.6564 0.8534 0.7420 1719
1 0.8878 0.7219 0.7963 2762
accuracy 0.7724 4481
macro avg 0.7721 0.7877 0.7692 4481
weighted avg 0.7990 0.7724 0.7755 4481
Current Time: 2024-01-03 10:12:27
The entire notebook runtime so far is 8 minutes
# Create an instance of the model
clf = BernoulliNB()
# Define the hyperparameters to tune
# skip the sigmoid and poly kernels, rarely used
param_grid = {'alpha': [0.1, 0.01, 0.001, 0.0001]}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, n_jobs=-1)
# Fit the grid search to the training data
print("Performing GridSearchCV")
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of model with the best hyperparameters
clf = BernoulliNB(**best_params)
# Fit the model to the training data
print("Fitting the model")
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_nb_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_nb = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Performing GridSearchCV
Best Parameters: {'alpha': 0.0001}
Best Scores: 0.7868864468864469
Fitting the model
Cross validation scores: [0.78241758 0.76410256 0.78827839 0.81172161 0.7970696 0.78021978
0.7978022 0.79120879 0.76996337 0.78608059]
Mean cross validation score: 0.7868864468864469
Standard Deviation cross validation score: 0.013156048769880177
Accuracy: 0.7728185672840884
Confusion matrix
[[1467 252]
[ 766 1996]]
True Negatives (TN) = 1467
False Positives (FP) = 252
False Negatives (FN) = 766
True Positives (TP) = 1996
Accuracy: 0.7728185672840884
Sensitivity: 0.722664735698769
Specificity: 0.8534031413612565
Geometric Mean: 0.7853179964806178
Precision: 0.7993085087682478
Recall: 0.7728185672840884
f1-score: 0.775938440626677
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.6570 0.8534 0.7424 1719
1 0.8879 0.7227 0.7968 2762
accuracy 0.7728 4481
macro avg 0.7724 0.7880 0.7696 4481
weighted avg 0.7993 0.7728 0.7759 4481
Current Time: 2024-01-03 10:12:28
The entire notebook runtime so far is 8 minutes
# Create an instance of the model
clf = SVC()
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_svm_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'C': 1.0, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 'scale', 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy: 0.8569515733095291
Confusion matrix
[[1677 42]
[ 599 2163]]
True Negatives (TN) = 1677
False Positives (FP) = 42
False Negatives (FN) = 599
True Positives (TP) = 2163
Accuracy: 0.8569515733095291
Sensitivity: 0.7831281679942071
Specificity: 0.9755671902268761
Geometric Mean: 0.8740675868819469
Precision: 0.8872979924031443
Recall: 0.8569515733095291
f1-score: 0.8589030510703584
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7368 0.9756 0.8395 1719
1 0.9810 0.7831 0.8709 2762
accuracy 0.8570 4481
macro avg 0.8589 0.8793 0.8552 4481
weighted avg 0.8873 0.8570 0.8589 4481
Current Time: 2024-01-03 10:12:36
The entire notebook runtime so far is 8 minutes
print("WARNING: SVM hyperparameter optimization is very CPU-intensive, this will take some time...")
WARNING: SVM hyperparameter optimization is very CPU-intensive, this will take some time...
# Create an instance of the model
clf = SVC()
# Define the hyperparameters to tune
# skip the sigmoid and poly kernels, rarely used
param_grid = {
'C': [0.1, 1, 10],
'kernel': ['rbf', 'linear'],
'probability': [True], #probability=True is required for VotingClassifier
'random_state': [42] #for reproducible results
}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, n_jobs=-1)
# Fit the grid search to the training data
print("Performing GridSearchCV")
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of model with the best hyperparameters
clf = SVC(**best_params)
# Fit the model to the training data
print("Fitting the model")
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_svm_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_svm = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Performing GridSearchCV
Best Parameters: {'C': 10, 'kernel': 'rbf', 'probability': True, 'random_state': 42}
Best Scores: 0.8905494505494506
Fitting the model
Cross validation scores: [0.9025641 0.87912088 0.89157509 0.90769231 0.88131868 0.88644689
0.88424908 0.89304029 0.88424908 0.8952381 ]
Mean cross validation score: 0.8905494505494506
Standard Deviation cross validation score: 0.00882897869314746
Accuracy: 0.8761437179201071
Confusion matrix
[[1665 54]
[ 501 2261]]
True Negatives (TN) = 1665
False Positives (FP) = 54
False Negatives (FN) = 501
True Positives (TP) = 2261
Accuracy: 0.8761437179201071
Sensitivity: 0.8186097031136857
Specificity: 0.9685863874345549
Geometric Mean: 0.8904460764458219
Precision: 0.8968902443431269
Recall: 0.8761437179201071
f1-score: 0.877816631996812
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7687 0.9686 0.8571 1719
1 0.9767 0.8186 0.8907 2762
accuracy 0.8761 4481
macro avg 0.8727 0.8936 0.8739 4481
weighted avg 0.8969 0.8761 0.8778 4481
Current Time: 2024-01-03 10:25:52
The entire notebook runtime so far is 21 minutes
# Create an instance of the model with the desired number of neighbors (you can adjust n_neighbors)
clf = KNeighborsClassifier(n_neighbors=5) # You can change the value of n_neighbors as needed
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_knn_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}
Accuracy: 0.8511492970319126
Confusion matrix
[[1597 122]
[ 545 2217]]
True Negatives (TN) = 1597
False Positives (FP) = 122
False Negatives (FN) = 545
True Positives (TP) = 2217
Accuracy: 0.8511492970319126
Sensitivity: 0.8026792179580015
Specificity: 0.9290285049447353
Geometric Mean: 0.863546104044093
Precision: 0.8702438686713598
Recall: 0.8511492970319126
f1-score: 0.8531314073473586
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7456 0.9290 0.8272 1719
1 0.9478 0.8027 0.8692 2762
accuracy 0.8511 4481
macro avg 0.8467 0.8659 0.8482 4481
weighted avg 0.8702 0.8511 0.8531 4481
Current Time: 2024-01-03 10:25:53
The entire notebook runtime so far is 21 minutes
# Create an instance of the model
clf = KNeighborsClassifier()
# Define the hyperparameters to tune
param_grid = {
'n_neighbors': [5,10,15,20,30],
'weights': ['uniform', 'distance']
}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, n_jobs=-1)
# Fit the grid search to the training data
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of the model with the best hyperparameters
clf = KNeighborsClassifier(**best_params)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_knn_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_knn = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Best Parameters: {'n_neighbors': 10, 'weights': 'distance'}
Best Scores: 0.8652747252747253
Cross validation scores: [0.87472527 0.86153846 0.87545788 0.88644689 0.86007326 0.85860806
0.85641026 0.85421245 0.85054945 0.87472527]
Mean cross validation score: 0.8652747252747253
Standard Deviation cross validation score: 0.011107783048924083
Accuracy: 0.8504798036152644
Confusion matrix
[[1634 85]
[ 585 2177]]
True Negatives (TN) = 1634
False Positives (FP) = 85
False Negatives (FN) = 585
True Positives (TP) = 2177
Accuracy: 0.8504798036152644
Sensitivity: 0.7881969587255612
Specificity: 0.9505526468877254
Geometric Mean: 0.865576516193362
Precision: 0.8757035150215654
Recall: 0.8504798036152644
f1-score: 0.8525316476697681
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7364 0.9506 0.8299 1719
1 0.9624 0.7882 0.8666 2762
accuracy 0.8505 4481
macro avg 0.8494 0.8694 0.8483 4481
weighted avg 0.8757 0.8505 0.8525 4481
Current Time: 2024-01-03 10:25:57
The entire notebook runtime so far is 21 minutes
MLPClassifier is a class in scikit-learn that represents a Multi-layer Perceptron (MLP) classifier, which is a type of artificial neural network.
An MLP is a feedforward neural network that consists of multiple layers of nodes (neurons) and can learn complex patterns and relationships in data.
The MLPClassifier is specifically designed for classification tasks.
Example of all hyperparameters:
mlp_classifier = MLPClassifier(
hidden_layer_sizes=(100, 50), # Architecture of hidden layers
activation='relu', # Activation function ('relu' is common)
solver='adam', # Optimization solver
alpha=0.0001, # L2 penalty (regularization)
batch_size='auto', # Size of mini-batches ('auto' is adaptive)
learning_rate='constant', # Learning rate schedule
learning_rate_init=0.001, # Initial learning rate
max_iter=500, # Maximum number of iterations
shuffle=True, # Shuffle data in each iteration
random_state=42, # Random seed for reproducibility
verbose=True # Print progress during training
)
# Create an instance of the model
clf = MLPClassifier(random_state=42)
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_mlp_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'activation': 'relu', 'alpha': 0.0001, 'batch_size': 'auto', 'beta_1': 0.9, 'beta_2': 0.999, 'early_stopping': False, 'epsilon': 1e-08, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant', 'learning_rate_init': 0.001, 'max_fun': 15000, 'max_iter': 200, 'momentum': 0.9, 'n_iter_no_change': 10, 'nesterovs_momentum': True, 'power_t': 0.5, 'random_state': 42, 'shuffle': True, 'solver': 'adam', 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': False, 'warm_start': False}
Accuracy: 0.8919883954474448
Confusion matrix
[[1625 94]
[ 390 2372]]
True Negatives (TN) = 1625
False Positives (FP) = 94
False Negatives (FN) = 390
True Positives (TP) = 2372
Accuracy: 0.8919883954474448
Sensitivity: 0.8587979724837075
Specificity: 0.9453170447934846
Geometric Mean: 0.9010196232174605
Precision: 0.9022555842686486
Recall: 0.8919883954474448
f1-score: 0.893211807734966
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8065 0.9453 0.8704 1719
1 0.9619 0.8588 0.9074 2762
accuracy 0.8920 4481
macro avg 0.8842 0.9021 0.8889 4481
weighted avg 0.9023 0.8920 0.8932 4481
Current Time: 2024-01-03 10:26:09
The entire notebook runtime so far is 21 minutes
#mlp_classifier = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
# Create an instance of the model
clf = MLPClassifier()
# Define the hyperparameters to tune
param_grid = {
'hidden_layer_sizes': [(100, 50), (50, 25), (150, 100)], #tuples for hidden layers
'max_iter': [300, 500, 800],
'alpha': [0.0001, 0.001, 0.01],
'random_state': [42] #for reproducible results
}
# other exaples to use in param_grid for testing
#param_grid = {
# 'hidden_layer_sizes': [(50, 25), (100, 50), (100, 100)],
# 'activation': ['relu', 'tanh'],
# 'alpha': [0.0001, 0.001, 0.01],
# 'learning_rate': ['constant', 'adaptive'],
# 'max_iter': [200, 300, 500],
#}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, n_jobs=-1)
# Fit the grid search to the training data
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of the model with the best hyperparameters
clf = MLPClassifier(**best_params)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_mlp_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_mlp = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Best Parameters: {'alpha': 0.01, 'hidden_layer_sizes': (50, 25), 'max_iter': 300, 'random_state': 42}
Best Scores: 0.8938461538461538
Cross validation scores: [0.8952381 0.88937729 0.8996337 0.91428571 0.88791209 0.89010989
0.88131868 0.8974359 0.88131868 0.9025641 ]
Mean cross validation score: 0.893919413919414
Standard Deviation cross validation score: 0.00961801812880664
Accuracy: 0.885070296808748
Confusion matrix
[[1571 148]
[ 367 2395]]
True Negatives (TN) = 1571
False Positives (FP) = 148
False Negatives (FN) = 367
True Positives (TP) = 2395
Accuracy: 0.885070296808748
Sensitivity: 0.8671252715423606
Specificity: 0.9139034322280396
Geometric Mean: 0.8902071454634782
Precision: 0.8914810444391161
Recall: 0.885070296808748
f1-score: 0.8861393351045901
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8106 0.9139 0.8592 1719
1 0.9418 0.8671 0.9029 2762
accuracy 0.8851 4481
macro avg 0.8762 0.8905 0.8810 4481
weighted avg 0.8915 0.8851 0.8861 4481
Current Time: 2024-01-03 10:47:17
The entire notebook runtime so far is 42 minutes
XGboost and gradient boosting are both ensemble learning models Gradient boosting is built into sklearn, but xgboost needs to install its own package Let's start with gradient boosting
model = GradientBoostingClassifier( n_estimators=100, # Number of boosting stages (trees) learning_rate=0.1, # Step size shrinkage to prevent overfitting max_depth=3, # Maximum tree depth random_state=42 # Seed for reproducibility )
# Create an instance of the model
clf = GradientBoostingClassifier(random_state=42)
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_gb_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.1, 'loss': 'log_loss', 'max_depth': 3, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_iter_no_change': None, 'random_state': 42, 'subsample': 1.0, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}
Accuracy: 0.9408614148627539
Confusion matrix
[[1659 60]
[ 205 2557]]
True Negatives (TN) = 1659
False Positives (FP) = 60
False Negatives (FN) = 205
True Positives (TP) = 2557
Accuracy: 0.9408614148627539
Sensitivity: 0.9257784214337437
Specificity: 0.9650959860383944
Geometric Mean: 0.9452327959220771
Precision: 0.943678301972848
Recall: 0.9408614148627539
f1-score: 0.9412609609817393
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8900 0.9651 0.9260 1719
1 0.9771 0.9258 0.9507 2762
accuracy 0.9409 4481
macro avg 0.9335 0.9454 0.9384 4481
weighted avg 0.9437 0.9409 0.9413 4481
Current Time: 2024-01-03 10:47:33
The entire notebook runtime so far is 43 minutes
# Create an instance of the model
clf = GradientBoostingClassifier()
default_params = clf.get_params()
print(f"Training model with default hyperparameters of: {default_params}")
# Define the hyperparameters to tune
param_grid = {
'n_estimators': [10, 100, 300],
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 10],
'random_state': [42] #for reproducible results
}
# Create an instance of GridSearchCV
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, n_jobs=-1)
# Fit the grid search to the training data
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Get the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Create a new instance of the model with the best hyperparameters
clf = GradientBoostingClassifier(**best_params)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict the labels for the test data
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_gb_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_gb = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Training model with default hyperparameters of: {'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.1, 'loss': 'log_loss', 'max_depth': 3, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_iter_no_change': None, 'random_state': None, 'subsample': 1.0, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}
Best Parameters: {'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 300, 'random_state': 42}
Best Scores: 0.9522344322344323
Cross validation scores: [0.96117216 0.94725275 0.95604396 0.95604396 0.94945055 0.96190476
0.94871795 0.94871795 0.94871795 0.94432234]
Mean cross validation score: 0.9522344322344323
Standard Deviation cross validation score: 0.005785226966270984
Accuracy: 0.951573309529123
Confusion matrix
[[1678 41]
[ 176 2586]]
True Negatives (TN) = 1678
False Positives (FP) = 41
False Negatives (FN) = 176
True Positives (TP) = 2586
Accuracy: 0.951573309529123
Sensitivity: 0.9362780593772628
Specificity: 0.9761489237929029
Geometric Mean: 0.956005658995815
Precision: 0.9539630814236849
Recall: 0.951573309529123
f1-score: 0.9518816022048988
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.9051 0.9761 0.9393 1719
1 0.9844 0.9363 0.9597 2762
accuracy 0.9516 4481
macro avg 0.9447 0.9562 0.9495 4481
weighted avg 0.9540 0.9516 0.9519 4481
Current Time: 2024-01-03 11:41:36
The entire notebook runtime so far is 97 minutes
XGBoost (eXtreme Gradient Boosting) is a popular and powerful open-source machine learning library designed for speed and performance.
It is an implementation of gradient boosting, a machine learning technique that builds a series of weak learners (typically decision trees) and combines their predictions to create a stronger, more accurate model.
XGBoost is known for its efficiency, scalability, and ability to handle diverse types of data.
XGBoost is not built into sklearn, you will need to install the package with: pip install xgboost
In this example, the xgb.DMatrix is a data structure that XGBoost uses for efficient training. The params dictionary contains various hyperparameters for the XGBoost model, and xgb.train is used to train the model. Finally, predictions are made on the test set, and the accuracy is evaluated.
# The xgboost library is not part of the default install of sklearn, check to see if xgboost library is installed
if 'xgboost' in sys.modules:
print(f"Confirmed xgboost library is installed")
else:
print(f"ERROR: xgboost library is NOT installed, please install with: pip install xgboost")
# only run the rest of the cell if the xgboost library is installed
if 'xgboost' in sys.modules:
# Convert data to DMatrix format (optimized data structure for XGBoost)
dtrain = xgb.DMatrix(X_train_resampled, label=y_train_label_resampled)
dtest = xgb.DMatrix(X_test, label=y_test_label)
# Set parameters for XGBoost
params = {
'objective': 'multi:softmax', # Multi-class classification
'num_class': 3, # Number of classes
'max_depth': 3,
'eta': 0.1,
'eval_metric': 'merror' # Mean classification error
}
# Train the XGBoost model
num_rounds = 100
xgb_model = xgb.train(params, dtrain, num_rounds)
# Make predictions on the test set
y_pred = xgb_model.predict(dtest)
# Convert predicted probabilities to class labels
y_pred = [int(round(pred)) for pred in y_pred]
# Evaluate the accuracy
accuracy = accuracy_score(y_test_label, y_pred)
print(f"Accuracy: {accuracy}")
accuracy = clf.score(X_test, y_test_label)
print(f"Accuracy: {accuracy}")
# save accuracy for later comparison
accuracy_xgb_undersampled_unoptimized = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Confirmed xgboost library is installed Accuracy: 0.9399687569738897 Accuracy: 0.951573309529123
Confusion matrix
[[1670 49]
[ 220 2542]]
True Negatives (TN) = 1670
False Positives (FP) = 49
False Negatives (FN) = 220
True Positives (TP) = 2542
Accuracy: 0.9399687569738897
Sensitivity: 0.9203475742215785
Specificity: 0.9714950552646888
Geometric Mean: 0.945575548267358
Precision: 0.9436891040782112
Recall: 0.9399687569738897
f1-score: 0.9404321033876315
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8836 0.9715 0.9255 1719
1 0.9811 0.9203 0.9497 2762
accuracy 0.9400 4481
macro avg 0.9323 0.9459 0.9376 4481
weighted avg 0.9437 0.9400 0.9404 4481
Current Time: 2024-01-03 11:41:36
The entire notebook runtime so far is 97 minutes
# The xgboost library is not part of the default install of sklearn, check to see if xgboost library is installed
if 'xgboost' in sys.modules:
print(f"Confirmed xgboost library is installed")
else:
print(f"ERROR: xgboost library is NOT installed, please install with: pip install xgboost")
# only run the rest of the cell if the xgboost library is installed
if 'xgboost' in sys.modules:
# Create an instance of the model
clf = xgb.XGBClassifier()
default_params = clf.get_params()
print(f"Default hyperparameters are: {default_params}")
print('\n')
# Define the hyperparameters to tune
param_grid = {
'objective': ['multi:softmax'],
'num_class': [3], # Number of classes
'max_depth': [3, 5, 7],
'learning_rate': [0.1, 0.01, 0.001],
'subsample': [0.8, 1.0],
'colsample_bytree': [0.8, 1.0],
'n_estimators': [50, 100, 200],
'random_state': [42] #for reproducible results
}
print(f"Adjusting hyperparameters to: {param_grid}")
print('\n')
# Use GridSearchCV to find the best hyperparameters
print(f"Performing GridSearchCV")
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, scoring='accuracy')
print(f"Fitting model")
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Print the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Evaluate the model with the best hyperparameters on the test set
clf = grid_search.best_estimator_
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the accuracy
accuracy = accuracy_score(y_test_label, y_pred)
print(f"Accuracy: {accuracy}")
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_xgb_undersampled_optimized = accuracy
# save best parameters for later comparison
best_params_xgb = best_params
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Confirmed xgboost library is installed
Default hyperparameters are: {'objective': 'binary:logistic', 'base_score': None, 'booster': None, 'callbacks': None, 'colsample_bylevel': None, 'colsample_bynode': None, 'colsample_bytree': None, 'device': None, 'early_stopping_rounds': None, 'enable_categorical': False, 'eval_metric': None, 'feature_types': None, 'gamma': None, 'grow_policy': None, 'importance_type': None, 'interaction_constraints': None, 'learning_rate': None, 'max_bin': None, 'max_cat_threshold': None, 'max_cat_to_onehot': None, 'max_delta_step': None, 'max_depth': None, 'max_leaves': None, 'min_child_weight': None, 'missing': nan, 'monotone_constraints': None, 'multi_strategy': None, 'n_estimators': None, 'n_jobs': None, 'num_parallel_tree': None, 'random_state': None, 'reg_alpha': None, 'reg_lambda': None, 'sampling_method': None, 'scale_pos_weight': None, 'subsample': None, 'tree_method': None, 'validate_parameters': None, 'verbosity': None}
Adjusting hyperparameters to: {'objective': ['multi:softmax'], 'num_class': [3], 'max_depth': [3, 5, 7], 'learning_rate': [0.1, 0.01, 0.001], 'subsample': [0.8, 1.0], 'colsample_bytree': [0.8, 1.0], 'n_estimators': [50, 100, 200], 'random_state': [42]}
Performing GridSearchCV
Fitting model
Best Parameters: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200, 'num_class': 3, 'objective': 'multi:softmax', 'random_state': 42, 'subsample': 0.8}
Best Scores: 0.9535531135531136
Cross validation scores: [0.96263736 0.94945055 0.95531136 0.95897436 0.95384615 0.96630037
0.94505495 0.95384615 0.94358974 0.94652015]
Mean cross validation score: 0.9535531135531136
Standard Deviation cross validation score: 0.007179487179487199
Accuracy: 0.9493416648069627
Accuracy: 0.9493416648069627
Confusion matrix
[[1672 47]
[ 180 2582]]
True Negatives (TN) = 1672
False Positives (FP) = 47
False Negatives (FN) = 180
True Positives (TP) = 2582
Accuracy: 0.9493416648069627
Sensitivity: 0.9348298334540188
Specificity: 0.9726585223967423
Geometric Mean: 0.9535566079157434
Precision: 0.9516957946259265
Recall: 0.9493416648069627
f1-score: 0.949660146713598
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.9028 0.9727 0.9364 1719
1 0.9821 0.9348 0.9579 2762
accuracy 0.9493 4481
macro avg 0.9425 0.9537 0.9472 4481
weighted avg 0.9517 0.9493 0.9497 4481
Current Time: 2024-01-03 11:50:02
The entire notebook runtime so far is 105 minutes
# this section compares the accuracy of different methods:
if (is_data_scaled == "yes"):
print(f"NOTE: This dataset has been scaled to avoid skewing the results due to large data distribution")
if (is_data_scaled == "no"):
print(f"NOTE: This dataset has NOT been scaled, so the results may be inaccurate!")
print('\n')
print(f"LR accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_lr_undersampled_unoptimized*100:.2f}%")
print(f"LR accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_lr_undersampled_optimized*100:.2f}%")
print('\n')
print(f"DT accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_dt_undersampled_unoptimized*100:.2f}%")
print(f"DT accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_dt_undersampled_optimized*100:.2f}%")
print('\n')
print(f"DS accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_ds_undersampled_unoptimized*100:.2f}%")
print(f"DS accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_ds_undersampled_optimized*100:.2f}%")
print('\n')
print(f"RF accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_rf_undersampled_unoptimized*100:.2f}%")
print(f"RF accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_rf_undersampled_optimized*100:.2f}%")
print('\n')
print(f"NB accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_nb_undersampled_unoptimized*100:.2f}%")
print(f"NB accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_nb_undersampled_optimized*100:.2f}%")
print('\n')
print(f"SVM accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_svm_undersampled_unoptimized*100:.2f}%")
print(f"SVM accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_svm_undersampled_optimized*100:.2f}%")
print('\n')
print(f"KNN accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_knn_undersampled_unoptimized*100:.2f}%")
print(f"KNN accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_knn_undersampled_optimized*100:.2f}%")
print('\n')
print(f"MLP accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_mlp_undersampled_unoptimized*100:.2f}%")
print(f"MLP accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_mlp_undersampled_optimized*100:.2f}%")
print('\n')
print(f"GB accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_gb_undersampled_unoptimized*100:.2f}%")
print(f"GB accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_gb_undersampled_optimized*100:.2f}%")
print('\n')
print(f"XGB accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_xgb_undersampled_unoptimized*100:.2f}%")
print(f"XGB accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_xgb_undersampled_optimized*100:.2f}%")
print('\n')
NOTE: This dataset has been scaled to avoid skewing the results due to large data distribution LR accuracy on undersampled balanced data, before hyperparameter optimimization: 85.03% LR accuracy on undersampled balanced data, after hyperparameter optimimization: 85.23% DT accuracy on undersampled balanced data, before hyperparameter optimimization: 92.68% DT accuracy on undersampled balanced data, after hyperparameter optimimization: 93.66% DS accuracy on undersampled balanced data, before hyperparameter optimimization: 76.23% DS accuracy on undersampled balanced data, after hyperparameter optimimization: 76.23% RF accuracy on undersampled balanced data, before hyperparameter optimimization: 94.87% RF accuracy on undersampled balanced data, after hyperparameter optimimization: 93.22% NB accuracy on undersampled balanced data, before hyperparameter optimimization: 77.24% NB accuracy on undersampled balanced data, after hyperparameter optimimization: 77.28% SVM accuracy on undersampled balanced data, before hyperparameter optimimization: 85.70% SVM accuracy on undersampled balanced data, after hyperparameter optimimization: 87.61% KNN accuracy on undersampled balanced data, before hyperparameter optimimization: 85.11% KNN accuracy on undersampled balanced data, after hyperparameter optimimization: 85.05% MLP accuracy on undersampled balanced data, before hyperparameter optimimization: 89.20% MLP accuracy on undersampled balanced data, after hyperparameter optimimization: 88.51% GB accuracy on undersampled balanced data, before hyperparameter optimimization: 94.09% GB accuracy on undersampled balanced data, after hyperparameter optimimization: 95.16% XGB accuracy on undersampled balanced data, before hyperparameter optimimization: 95.16% XGB accuracy on undersampled balanced data, after hyperparameter optimimization: 94.93%
This section takes the individual ML algorithms tested earlier, then runs them through an ensemble model The goal is to see if ensemble learning can give us higher accuracy
Voting Classifier: 2 methods: hard voting (majority vote), and soft voting (takes the average of predictive probabilities, takes the class with the highest average probability)
Stacking Classifier: Generates a final model based on multiple base models. Predictions in intermediate steps are used to generate meta-models.
Boosting Classifer: Trains weak model, generate new model on poorly performing instances, tweak the weights to get better accuracy. The AdaBoostClassifier is an ensemble learning algorithm that belongs to the family of boosting methods. It is specifically designed for binary classification problems but can be extended to multi-class classification. AdaBoost stands for Adaptive Boosting, and its primary goal is to combine the predictions from multiple weak classifiers to create a strong classifier.
Bagging Classifier: Bagging (Bootstrap Aggregating) is an ensemble learning technique that aims to improve the stability and accuracy of machine learning models. It involves training multiple instances of the same base model on different subsets of the training data. The predictions from individual models are then combined, often by averaging or voting, to produce the final prediction. BaggingClassifier is a powerful ensemble technique that is particularly effective when applied to base models with high variance. It offers improved generalization, stability, and robustness, but it may not be the optimal choice for all scenarios, and its effectiveness depends on the characteristics of the base model and the dataset.
Comparison Table
| Method | Combines Models | Strengths | Weaknesses |
|---|---|---|---|
| Voting | Yes | Simple, effective for balancing out model weaknesses. | Not as sophisticated as other methods. |
| Stacking | Yes | Can leverage the strengths of a combination of models. | Risk of overfitting. |
| Boosting | No | Can turn a weak model into a strong one. | Sensitive to noisy data and outliers. |
| Bagging | No | Minimizes overfitting with data with high variance | Depends on base model performance |
print(f"Best parameters for LR: {best_params_lr}")
print(f"Best parameters for DT: {best_params_dt}")
print(f"Best parameters for DS: {best_params_ds}")
print(f"Best parameters for RF: {best_params_rf}")
print(f"Best parameters for NB: {best_params_nb}")
print(f"Best parameters for SVM: {best_params_svm}")
print(f"Best parameters for KNN: {best_params_knn}")
print(f"Best parameters for MLP: {best_params_mlp}")
print(f"Best parameters for GB: {best_params_gb}")
print(f"Best parameters for XGB: {best_params_xgb}")
Best parameters for LR: {'C': 100, 'max_iter': 100, 'penalty': 'l1', 'random_state': 42, 'solver': 'liblinear'}
Best parameters for DT: {'criterion': 'entropy', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 10, 'random_state': 42}
Best parameters for DS: {'criterion': 'gini', 'max_depth': 1, 'min_samples_leaf': 1, 'min_samples_split': 2, 'random_state': 42}
Best parameters for RF: {'max_depth': 10, 'n_estimators': 300, 'random_state': 42}
Best parameters for NB: {'alpha': 0.0001}
Best parameters for SVM: {'C': 10, 'kernel': 'rbf', 'probability': True, 'random_state': 42}
Best parameters for KNN: {'n_neighbors': 10, 'weights': 'distance'}
Best parameters for MLP: {'alpha': 0.01, 'hidden_layer_sizes': (50, 25), 'max_iter': 300, 'random_state': 42}
Best parameters for GB: {'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 300, 'random_state': 42}
Best parameters for XGB: {'colsample_bytree': 0.8, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 200, 'num_class': 3, 'objective': 'multi:softmax', 'random_state': 42, 'subsample': 0.8}
# In the previous cell, we have optimized hyperparameters for each of the base classifiers saved in python dictionaries.
# Now we will use the ** unpacking syntax to pass the key-value pairs from the dictionaries as keyword arguments to each classifier constructor.
# This way, the hyperparameters specified in each dictionary are correctly applied when creating each individual classifier.
# Define individual classifiers using hyperparameters calculated earlier
lr_clf = LogisticRegression(**best_params_lr)
dt_clf = DecisionTreeClassifier(**best_params_dt)
ds_clf = DecisionTreeClassifier(**best_params_ds)
rf_clf = RandomForestClassifier(**best_params_rf)
nb_clf = BernoulliNB(**best_params_nb)
svm_clf = SVC(**best_params_svm) #need probability=True for voting classifier, already set in hyperparameter optimization section
knn_clf = KNeighborsClassifier(**best_params_knn)
mlp_clf = MLPClassifier(**best_params_mlp)
gb_clf = GradientBoostingClassifier(**best_params_gb)
xgb_clf = xgb.XGBClassifier(**best_params_xgb)
print(f"Best parameters for LR: {lr_clf}")
print(f"Best parameters for DT: {dt_clf}")
print(f"Best parameters for DS: {ds_clf}")
print(f"Best parameters for RF: {rf_clf}")
print(f"Best parameters for NB: {nb_clf}")
print(f"Best parameters for SVM: {svm_clf}")
print(f"Best parameters for KNN: {knn_clf}")
print(f"Best parameters for MLP: {mlp_clf}")
print(f"Best parameters for GB: {gb_clf}")
print(f"Best parameters for XGB: {xgb_clf}")
Best parameters for LR: LogisticRegression(C=100, penalty='l1', random_state=42, solver='liblinear')
Best parameters for DT: DecisionTreeClassifier(criterion='entropy', max_depth=10, min_samples_split=10,
random_state=42)
Best parameters for DS: DecisionTreeClassifier(max_depth=1, random_state=42)
Best parameters for RF: RandomForestClassifier(max_depth=10, n_estimators=300, random_state=42)
Best parameters for NB: BernoulliNB(alpha=0.0001)
Best parameters for SVM: SVC(C=10, probability=True, random_state=42)
Best parameters for KNN: KNeighborsClassifier(n_neighbors=10, weights='distance')
Best parameters for MLP: MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42)
Best parameters for GB: GradientBoostingClassifier(max_depth=10, n_estimators=300, random_state=42)
Best parameters for XGB: XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=0.8, device=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=0.1, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=5, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
multi_strategy=None, n_estimators=200, n_jobs=None, num_class=3,
num_parallel_tree=None, ...)
In this example:
SVC, KNeighborsClassifier, and RandomForestClassifier are individual classifiers.
A VotingClassifier is created with these classifiers and a soft voting strategy. Soft voting predicts the class label based on the argmax of the sums of the predicted probabilities.
The ensemble model is trained on the training set.
Predictions are made on the test set, and the performance of the ensemble model is evaluated.
You can adjust the parameters of the individual classifiers and the VotingClassifier based on your specific needs. Note that not all classifiers support probability estimates (probability=True), so make sure to check the documentation for each classifier.
Ensemble methods like VotingClassifier are beneficial when combining diverse models that capture different aspects of the data, leading to a more robust and accurate overall model.
# Try the voting classifier with all the base models
# Create a VotingClassifier with 'soft' voting (uses predicted probabilities)
clf = VotingClassifier(
estimators=[('lr', lr_clf), ('dt', dt_clf), ('rf', rf_clf), ('nb', nb_clf), ('svm', svm_clf), ('knn', knn_clf), ('mlp', mlp_clf), ('gb', gb_clf)],
voting='soft' # 'hard' for majority voting, 'soft' for weighted voting based on probabilities
)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_ensemble_voting = accuracy
Accuracy: 0.9185449676411516
# Try the voting classifier with stronger learners to see if you get better accuracy
# Create a VotingClassifier with 'soft' voting (uses predicted probabilities)
clf = VotingClassifier(
estimators=[('svm', svm_clf), ('rf', rf_clf), ('dt', dt_clf)],
voting='soft' # 'hard' for majority voting, 'soft' for weighted voting based on probabilities
)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_ensemble_voting = accuracy
Accuracy: 0.9417540727516179
# Try the voting classifier with the weakest base models
# Create a VotingClassifier with 'soft' voting (uses predicted probabilities)
clf = VotingClassifier(
estimators=[('lr', lr_clf), ('nb', nb_clf), ('svm', svm_clf), ('knn', knn_clf), ('mlp', mlp_clf)],
voting='soft' # 'hard' for majority voting, 'soft' for weighted voting based on probabilities
)
# , ('mlp', mlp_clf), ('gb', gb_clf), ('xgb', xgb_clf)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_ensemble_voting = accuracy
Accuracy: 0.8727962508368667
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Confusion matrix
[[1655 64]
[ 506 2256]]
True Negatives (TN) = 1655
False Positives (FP) = 64
False Negatives (FN) = 506
True Positives (TP) = 2256
Accuracy: 0.8727962508368667
Sensitivity: 0.8167994207096307
Specificity: 0.9627690517742874
Geometric Mean: 0.8867858838335206
Precision: 0.8931715186211467
Recall: 0.8727962508368667
f1-score: 0.8745099329621357
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7658 0.9628 0.8531 1719
1 0.9724 0.8168 0.8878 2762
accuracy 0.8728 4481
macro avg 0.8691 0.8898 0.8705 4481
weighted avg 0.8932 0.8728 0.8745 4481
Current Time: 2024-01-03 11:54:43
The entire notebook runtime so far is 110 minutes
# Create the VotingClassifier
clf = VotingClassifier(estimators=[('lr', lr_clf), ('svm', svm_clf), ('nb', nb_clf), ('knn', knn_clf)], voting='hard')
# Define the hyperparameters to tune
param_grid = {
'lr__C': [0.1, 1, 10], # LogisticRegression hyperparameter
'svm__C': [0.1, 1, 10], # SVC hyperparameter
'knn__n_neighbors': [5, 10, 30], # KNN hyperparameter
'nb__alpha': [0.1, 0.01, 0.001, 0.0001] # NB hyperparameter
}
# Use GridSearchCV for hyperparameter tuning
print(f"Performing GridSearchCV")
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, scoring='accuracy')
print(f"Fitting model")
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Validate on Test Set
clf = grid_search.best_estimator_
print(f"Found best_estimator_ {clf}")
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate performance on the test set
accuracy = accuracy_score(y_test_label, y_pred)
print(f"Final Accuracy on Test Set: {accuracy}")
# save accuracy for later comparison
accuracy_ensemble_voting = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Performing GridSearchCV
Fitting model
Found best_estimator_ VotingClassifier(estimators=[('lr',
LogisticRegression(C=10, penalty='l1',
random_state=42,
solver='liblinear')),
('svm',
SVC(C=10, probability=True, random_state=42)),
('nb', BernoulliNB(alpha=0.1)),
('knn', KNeighborsClassifier(weights='distance'))])
Cross validation scores: [0.87399267 0.85567766 0.86300366 0.87838828 0.85714286 0.86813187
0.85787546 0.86300366 0.85641026 0.87545788]
Mean cross validation score: 0.8649084249084249
Standard Deviation cross validation score: 0.008126256189809274
Final Accuracy on Test Set: 0.8444543628654319
Confusion matrix
[[1689 30]
[ 667 2095]]
True Negatives (TN) = 1689
False Positives (FP) = 30
False Negatives (FN) = 667
True Positives (TP) = 2095
Accuracy: 0.8444543628654319
Sensitivity: 0.7585083272990587
Specificity: 0.9825479930191972
Geometric Mean: 0.8632907011407215
Precision: 0.8826927455811573
Recall: 0.8444543628654319
f1-score: 0.8464743720681127
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7169 0.9825 0.8290 1719
1 0.9859 0.7585 0.8574 2762
accuracy 0.8445 4481
macro avg 0.8514 0.8705 0.8432 4481
weighted avg 0.8827 0.8445 0.8465 4481
Current Time: 2024-01-03 18:15:49
The entire notebook runtime so far is 491 minutes
This model (StackingClassifier) uses multiple base estimators such as LR, NB, SVC, KNN, etc.
A StackingClassifier is created with these multiple base classifiers classifiers and a meta-classifier (LogisticRegression) as the final estimator.
The stacking ensemble model is trained on the training set.
Predictions are made on the test set, and the performance of the stacking ensemble model is evaluated.
You can customize the base estimators, the final estimator, and other parameters of the StackingClassifier based on your specific needs.
# Try all the base estimators with the default final_estimator
# Create a stacking ensemble model with a logistic regression meta-classifier
clf = StackingClassifier(
estimators=[('lr', lr_clf), ('dt', dt_clf), ('rf', rf_clf), ('nb', nb_clf), ('svm', svm_clf), ('knn', knn_clf), ('mlp', mlp_clf), ('gb', gb_clf)],
final_estimator=LogisticRegression()
)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_ensemble_stacking = accuracy
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Accuracy: 0.9500111582236108 Current Time: 2024-01-03 18:31:53 The entire notebook runtime so far is 507 minutes
# Try only the strongest base classifiers in the stacking classifier, with the default final_estimator
clf = StackingClassifier(
estimators=[('dt', dt_clf), ('rf', rf_clf), ('gb', gb_clf)],
final_estimator=LogisticRegression()
)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_ensemble_stacking = accuracy
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Accuracy: 0.9493416648069627 Current Time: 2024-01-03 18:45:10 The entire notebook runtime so far is 520 minutes
# Try only the weakest base models with the default final_estimator
# Create a stacking ensemble model with a logistic regression meta-classifier
clf = StackingClassifier(
estimators=[('lr', lr_clf), ('nb', nb_clf), ('svm', svm_clf), ('knn', knn_clf), ('mlp', mlp_clf)],
final_estimator=LogisticRegression()
)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Evaluate the model
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# save accuracy for later comparison
accuracy_ensemble_stacking = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Accuracy: 0.893550546752957
Confusion matrix
[[1613 106]
[ 371 2391]]
True Negatives (TN) = 1613
False Positives (FP) = 106
False Negatives (FN) = 371
True Positives (TP) = 2391
Accuracy: 0.893550546752957
Sensitivity: 0.8656770456191166
Specificity: 0.9383362420011635
Geometric Mean: 0.9012747337925943
Precision: 0.9020987351154506
Recall: 0.893550546752957
f1-score: 0.8946775158912602
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8130 0.9383 0.8712 1719
1 0.9575 0.8657 0.9093 2762
accuracy 0.8936 4481
macro avg 0.8853 0.9020 0.8902 4481
weighted avg 0.9021 0.8936 0.8947 4481
Current Time: 2024-01-03 18:47:57
The entire notebook runtime so far is 523 minutes
# start with multiple weak base_estimators, then create a for loop to test each final_estimator, keeping track of the best final_estimator
estimator_type = "weak" #strong|weak flag to determine which base estimators to use
strong_base_estimators = [('rf', rf_clf), ('gb', gb_clf), ('dt', dt_clf)]
weak_base_estimators = [('lr', lr_clf), ('nb', nb_clf), ('svm', svm_clf), ('knn', knn_clf), ('mlp', mlp_clf)]
final_estimators = ['RandomForestClassifier', 'DecisionTreeClassifier', 'GradientBoostingClassifier','LogisticRegression','BernoulliNB', 'SVC', 'KNN', 'MLPClassifier']
final_estimators = ['BernoulliNB', 'SVC', 'KNN', 'MLPClassifier', 'LogisticRegression']
if (estimator_type == "strong"): base_estimators = strong_base_estimators
if (estimator_type == "weak"): base_estimators = weak_base_estimators
best_final_estimator_name = "none"
best_final_estimator_accuracy = 0 #initialize value to keep track of the accuracy level of each final classifier
for my_final_estimator in final_estimators:
print('\n')
print(f"Testing hyperparameter optimization with {estimator_type} base estimators {base_estimators} and final_estimator={my_final_estimator}")
if (my_final_estimator == 'RandomForestClassifier'):
ensemble = StackingClassifier(estimators=base_estimators, final_estimator=RandomForestClassifier())
ensemble_params = {'final_estimator__n_estimators': [50, 100, 200], 'final_estimator__max_depth': [None, 5, 10, 15]} #tunable hyperparameters for final_estimator
if (my_final_estimator == 'DecisionTreeClassifier'):
ensemble = StackingClassifier(estimators=base_estimators, final_estimator=DecisionTreeClassifier())
ensemble_params = {'final_estimator__max_depth': [None, 5, 10, 15]} #tunable hyperparameters for final_estimator
if (my_final_estimator == 'GradientBoostingClassifier'):
ensemble = StackingClassifier(estimators=base_estimators, final_estimator=GradientBoostingClassifier())
ensemble_params = {'final_estimator__n_estimators': [10, 100, 300], 'final_estimator__learning_rate': [0.1, 0.01, 0.2], 'final_estimator__max_depth': [3,5,10]} #tunable hyperparameters for final_estimator
if (my_final_estimator == 'LogisticRegression'):
ensemble = StackingClassifier(estimators=base_estimators, final_estimator=LogisticRegression())
ensemble_params = {'final_estimator__C': [1, 10, 100], 'final_estimator__max_iter': [100, 200, 300]} #tunable hyperparameters for final_estimator
if (my_final_estimator == 'BernoulliNB'):
ensemble = StackingClassifier(estimators=base_estimators, final_estimator=BernoulliNB())
ensemble_params = {'final_estimator__alpha': [0.1, 0.001]} #tunable hyperparameters for final_estimator
if (my_final_estimator == 'SVC'):
ensemble = StackingClassifier(estimators=base_estimators, final_estimator=SVC())
ensemble_params = {'final_estimator__C': [1, 10]} #tunable hyperparameters for final_estimator
if (my_final_estimator == 'KNN'):
ensemble = StackingClassifier(estimators=base_estimators, final_estimator=KNeighborsClassifier())
ensemble_params = {'final_estimator__n_neighbors': [10,30]} #tunable hyperparameters for final_estimator
if (my_final_estimator == 'MLPClassifier'):
ensemble = StackingClassifier(estimators=base_estimators, final_estimator=MLPClassifier())
ensemble_params = {'final_estimator__hidden_layer_sizes': [(100, 50), (50, 25), (150, 100)], 'final_estimator__max_iter': [500, 800], 'final_estimator__alpha': [0.001, 0.01]} #tunable hyperparameters for final_estimator
print(f"Performing GridSearchCV for final_estimator={my_final_estimator}")
ensemble_grid = GridSearchCV(ensemble, ensemble_params, cv=cv_count, scoring='accuracy')
print(f"Fitting model")
ensemble_grid.fit(X_train_resampled, y_train_label_resampled)
# Validate on Test Set
clf = ensemble_grid.best_estimator_
print(f"Found best_estimator_ {clf}")
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate performance on the test set
accuracy = accuracy_score(y_test_label, y_pred)
print(f"Final Accuracy on Test Set: {accuracy}")
# of all the final_estimators, check to see if this final_estimator provides the best accuracy
if (accuracy > best_final_estimator_accuracy):
best_final_estimator_name = my_final_estimator #save the name of the final_estimator that is currently the best
best_final_estimator_accuracy = accuracy #save the accuracy of the final estimator that is currently the best
print(f"The best final_estimator so far is {best_final_estimator_name}, with accuracy of {best_final_estimator_accuracy}")
else:
print(f"This is not the best base classifier")
# save accuracy for later comparison
accuracy_ensemble_stacking = best_final_estimator_accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
# after testing all the final_estimators, display the best one
print(f"After checking each final_estimator, the best final_estimator is {best_final_estimator_name}, with accuracy of {best_final_estimator_accuracy}")
Testing hyperparameter optimization with weak base estimators [('lr', LogisticRegression(C=100, penalty='l1', random_state=42, solver='liblinear')), ('nb', BernoulliNB(alpha=0.0001)), ('svm', SVC(C=10, probability=True, random_state=42)), ('knn', KNeighborsClassifier(n_neighbors=10, weights='distance')), ('mlp', MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42))] and final_estimator=BernoulliNB
Performing GridSearchCV for final_estimator=BernoulliNB
Fitting model
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Found best_estimator_ StackingClassifier(estimators=[('lr',
LogisticRegression(C=100, penalty='l1',
random_state=42,
solver='liblinear')),
('nb', BernoulliNB(alpha=0.0001)),
('svm',
SVC(C=10, probability=True, random_state=42)),
('knn',
KNeighborsClassifier(n_neighbors=10,
weights='distance')),
('mlp',
MLPClassifier(alpha=0.01,
hidden_layer_sizes=(50, 25),
max_iter=300, random_state=42))],
final_estimator=BernoulliNB(alpha=0.1))
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Cross validation scores: [0.64542125 0.65054945 0.66593407 0.66373626 0.66080586 0.63369963 0.65494505 0.64322344 0.65641026 0.65567766] Mean cross validation score: 0.653040293040293 Standard Deviation cross validation score: 0.009474088299092858 Final Accuracy on Test Set: 0.7259540281187234 The best final_estimator so far is BernoulliNB, with accuracy of 0.7259540281187234
Confusion matrix
[[ 539 1180]
[ 48 2714]]
True Negatives (TN) = 539
False Positives (FP) = 1180
False Negatives (FN) = 48
True Positives (TP) = 2714
Accuracy: 0.7259540281187234
Sensitivity: 0.9826212889210717
Specificity: 0.3135543920884235
Geometric Mean: 0.5550722663769014
Precision: 0.7818488541165056
Recall: 0.7259540281187234
f1-score: 0.6819941484668166
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.9182 0.3136 0.4675 1719
1 0.6970 0.9826 0.8155 2762
accuracy 0.7260 4481
macro avg 0.8076 0.6481 0.6415 4481
weighted avg 0.7818 0.7260 0.6820 4481
Current Time: 2024-01-03 20:05:04
The entire notebook runtime so far is 600 minutes
Testing hyperparameter optimization with weak base estimators [('lr', LogisticRegression(C=100, penalty='l1', random_state=42, solver='liblinear')), ('nb', BernoulliNB(alpha=0.0001)), ('svm', SVC(C=10, probability=True, random_state=42)), ('knn', KNeighborsClassifier(n_neighbors=10, weights='distance')), ('mlp', MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42))] and final_estimator=SVC
Performing GridSearchCV for final_estimator=SVC
Fitting model
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Found best_estimator_ StackingClassifier(estimators=[('lr',
LogisticRegression(C=100, penalty='l1',
random_state=42,
solver='liblinear')),
('nb', BernoulliNB(alpha=0.0001)),
('svm',
SVC(C=10, probability=True, random_state=42)),
('knn',
KNeighborsClassifier(n_neighbors=10,
weights='distance')),
('mlp',
MLPClassifier(alpha=0.01,
hidden_layer_sizes=(50, 25),
max_iter=300, random_state=42))],
final_estimator=SVC(C=10))
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Cross validation scores: [0.9025641 0.89010989 0.9018315 0.91794872 0.88278388 0.8952381 0.89157509 0.89377289 0.89084249 0.90915751] Mean cross validation score: 0.8975824175824176 Standard Deviation cross validation score: 0.00983869419147225 Final Accuracy on Test Set: 0.8910957375585806 The best final_estimator so far is SVC, with accuracy of 0.8910957375585806
Confusion matrix
[[1652 67]
[ 421 2341]]
True Negatives (TN) = 1652
False Positives (FP) = 67
False Negatives (FN) = 421
True Positives (TP) = 2341
Accuracy: 0.8910957375585806
Sensitivity: 0.8475742215785662
Specificity: 0.9610238510762071
Geometric Mean: 0.902518167404043
Precision: 0.9049415847029011
Recall: 0.8910957375585806
f1-score: 0.8924506414504674
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7969 0.9610 0.8713 1719
1 0.9722 0.8476 0.9056 2762
accuracy 0.8911 4481
macro avg 0.8845 0.9043 0.8885 4481
weighted avg 0.9049 0.8911 0.8925 4481
Current Time: 2024-01-03 21:17:04
The entire notebook runtime so far is 672 minutes
Testing hyperparameter optimization with weak base estimators [('lr', LogisticRegression(C=100, penalty='l1', random_state=42, solver='liblinear')), ('nb', BernoulliNB(alpha=0.0001)), ('svm', SVC(C=10, probability=True, random_state=42)), ('knn', KNeighborsClassifier(n_neighbors=10, weights='distance')), ('mlp', MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42))] and final_estimator=KNN
Performing GridSearchCV for final_estimator=KNN
Fitting model
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Found best_estimator_ StackingClassifier(estimators=[('lr',
LogisticRegression(C=100, penalty='l1',
random_state=42,
solver='liblinear')),
('nb', BernoulliNB(alpha=0.0001)),
('svm',
SVC(C=10, probability=True, random_state=42)),
('knn',
KNeighborsClassifier(n_neighbors=10,
weights='distance')),
('mlp',
MLPClassifier(alpha=0.01,
hidden_layer_sizes=(50, 25),
max_iter=300, random_state=42))],
final_estimator=KNeighborsClassifier(n_neighbors=30))
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Cross validation scores: [0.90769231 0.89010989 0.9032967 0.92087912 0.88424908 0.89450549 0.88571429 0.8981685 0.89230769 0.90915751] Mean cross validation score: 0.8986080586080586 Standard Deviation cross validation score: 0.011014378534595023 Final Accuracy on Test Set: 0.8913189020307967 The best final_estimator so far is KNN, with accuracy of 0.8913189020307967
Confusion matrix
[[1642 77]
[ 410 2352]]
True Negatives (TN) = 1642
False Positives (FP) = 77
False Negatives (FN) = 410
True Positives (TP) = 2352
Accuracy: 0.8913189020307967
Sensitivity: 0.8515568428674873
Specificity: 0.9552065154159395
Geometric Mean: 0.9018939208987115
Precision: 0.9038114018642893
Recall: 0.8913189020307967
f1-score: 0.8926315523189773
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8002 0.9552 0.8709 1719
1 0.9683 0.8516 0.9062 2762
accuracy 0.8913 4481
macro avg 0.8842 0.9034 0.8885 4481
weighted avg 0.9038 0.8913 0.8926 4481
Current Time: 2024-01-03 22:25:01
The entire notebook runtime so far is 740 minutes
Testing hyperparameter optimization with weak base estimators [('lr', LogisticRegression(C=100, penalty='l1', random_state=42, solver='liblinear')), ('nb', BernoulliNB(alpha=0.0001)), ('svm', SVC(C=10, probability=True, random_state=42)), ('knn', KNeighborsClassifier(n_neighbors=10, weights='distance')), ('mlp', MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42))] and final_estimator=MLPClassifier
Performing GridSearchCV for final_estimator=MLPClassifier
Fitting model
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Found best_estimator_ StackingClassifier(estimators=[('lr',
LogisticRegression(C=100, penalty='l1',
random_state=42,
solver='liblinear')),
('nb', BernoulliNB(alpha=0.0001)),
('svm',
SVC(C=10, probability=True, random_state=42)),
('knn',
KNeighborsClassifier(n_neighbors=10,
weights='distance')),
('mlp',
MLPClassifier(alpha=0.01,
hidden_layer_sizes=(50, 25),
max_iter=300, random_state=42))],
final_estimator=MLPClassifier(alpha=0.01,
hidden_layer_sizes=(150, 100),
max_iter=500))
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Cross validation scores: [0.9047619 0.89157509 0.8989011 0.91941392 0.88791209 0.8981685 0.88791209 0.8974359 0.88937729 0.90622711] Mean cross validation score: 0.8981684981684982 Standard Deviation cross validation score: 0.009444584538891325 Final Accuracy on Test Set: 0.8868556125864763 This is not the best base classifier
Confusion matrix
[[1650 69]
[ 438 2324]]
True Negatives (TN) = 1650
False Positives (FP) = 69
False Negatives (FN) = 438
True Positives (TP) = 2324
Accuracy: 0.8868556125864763
Sensitivity: 0.8414192614047792
Specificity: 0.9598603839441536
Geometric Mean: 0.8986907228351685
Precision: 0.9017552755443545
Recall: 0.8868556125864763
f1-score: 0.8882894758291611
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7902 0.9599 0.8668 1719
1 0.9712 0.8414 0.9016 2762
accuracy 0.8869 4481
macro avg 0.8807 0.9006 0.8842 4481
weighted avg 0.9018 0.8869 0.8883 4481
Current Time: 2024-01-04 03:28:43
The entire notebook runtime so far is 1044 minutes
Testing hyperparameter optimization with weak base estimators [('lr', LogisticRegression(C=100, penalty='l1', random_state=42, solver='liblinear')), ('nb', BernoulliNB(alpha=0.0001)), ('svm', SVC(C=10, probability=True, random_state=42)), ('knn', KNeighborsClassifier(n_neighbors=10, weights='distance')), ('mlp', MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42))] and final_estimator=LogisticRegression
Performing GridSearchCV for final_estimator=LogisticRegression
Fitting model
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Found best_estimator_ StackingClassifier(estimators=[('lr',
LogisticRegression(C=100, penalty='l1',
random_state=42,
solver='liblinear')),
('nb', BernoulliNB(alpha=0.0001)),
('svm',
SVC(C=10, probability=True, random_state=42)),
('knn',
KNeighborsClassifier(n_neighbors=10,
weights='distance')),
('mlp',
MLPClassifier(alpha=0.01,
hidden_layer_sizes=(50, 25),
max_iter=300, random_state=42))],
final_estimator=LogisticRegression(C=1))
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Cross validation scores: [0.9032967 0.89377289 0.9010989 0.91575092 0.88717949 0.88791209 0.88644689 0.8959707 0.88424908 0.8996337 ] Mean cross validation score: 0.8955311355311355 Standard Deviation cross validation score: 0.0092621131893211 Final Accuracy on Test Set: 0.893550546752957 The best final_estimator so far is LogisticRegression, with accuracy of 0.893550546752957
Confusion matrix
[[1613 106]
[ 371 2391]]
True Negatives (TN) = 1613
False Positives (FP) = 106
False Negatives (FN) = 371
True Positives (TP) = 2391
Accuracy: 0.893550546752957
Sensitivity: 0.8656770456191166
Specificity: 0.9383362420011635
Geometric Mean: 0.9012747337925943
Precision: 0.9020987351154506
Recall: 0.893550546752957
f1-score: 0.8946775158912602
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8130 0.9383 0.8712 1719
1 0.9575 0.8657 0.9093 2762
accuracy 0.8936 4481
macro avg 0.8853 0.9020 0.8902 4481
weighted avg 0.9021 0.8936 0.8947 4481
Current Time: 2024-01-04 07:10:23
The entire notebook runtime so far is 1266 minutes
After checking each final_estimator, the best final_estimator is LogisticRegression, with accuracy of 0.893550546752957
# Bagging can only use a single base classifier
# Use a for loop to test all the base classifiers with bagging, one base classifier at a time, keeping track of the best_final_estimator_name
best_base_classifier_name = "none"
best_base_classifier_accuracy = 0 #initialize value to keep track of the accuracy level of each base classifier
base_classifiers = [lr_clf, dt_clf, rf_clf, nb_clf, svm_clf, knn_clf, mlp_clf, gb_clf, xgb_clf] #xgb_clf causing error?
base_classifiers = [lr_clf, dt_clf, rf_clf, nb_clf, svm_clf, knn_clf, mlp_clf, gb_clf] # all classifiers
base_classifiers = [dt_clf, rf_clf, gb_clf] # strong learners
base_classifiers = [lr_clf, nb_clf, svm_clf, knn_clf, mlp_clf] # weak learners
for base_classifier in base_classifiers:
print("\n")
print(f"------------------------------------")
print(f"Base classifier is {base_classifier}")
print(f"------------------------------------")
# Define the BaggingClassifier
clf = BaggingClassifier(base_classifier, n_estimators=50, random_state=42)
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict on the test set
y_pred = clf.predict(X_test)
# Evaluate the accuracy
accuracy = accuracy_score(y_test_label, y_pred)
print(f"Accuracy: {accuracy}")
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# of all the base_classifiers, check to see if this base_classifier provides the best accuracy
if (accuracy > best_base_classifier_accuracy):
best_base_classifier_name = base_classifier #save the name of the base_classifier that is currently the best
best_base_classifier_accuracy = accuracy #save the accuracy of the final estimator that is currently the best
print(f"The best base_classifier so far is {best_base_classifier_name}, with accuracy of {best_base_classifier_accuracy}")
else:
print(f"This is not the best base classifier")
# save accuracy for later comparison
accuracy_ensemble_bagging = best_base_classifier_accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
# after testing all the final_estimators, display the best one
print(f"After checking each base_classifier, the best base_classifier is {best_base_classifier_name}, with accuracy of {best_base_classifier_accuracy}")
------------------------------------ Base classifier is LogisticRegression(C=100, penalty='l1', random_state=42, solver='liblinear') ------------------------------------ Accuracy: 0.8520419549207766 Accuracy: 0.8520419549207766 The best base_classifier so far is LogisticRegression(C=100, penalty='l1', random_state=42, solver='liblinear'), with accuracy of 0.8520419549207766
Confusion matrix
[[1616 103]
[ 560 2202]]
True Negatives (TN) = 1616
False Positives (FP) = 103
False Negatives (FN) = 560
True Positives (TP) = 2202
Accuracy: 0.8520419549207766
Sensitivity: 0.7972483707458363
Specificity: 0.9400814426992438
Geometric Mean: 0.865724204675119
Precision: 0.8737310948411419
Recall: 0.8520419549207766
f1-score: 0.8540496328016558
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7426 0.9401 0.8298 1719
1 0.9553 0.7972 0.8692 2762
accuracy 0.8520 4481
macro avg 0.8490 0.8687 0.8495 4481
weighted avg 0.8737 0.8520 0.8540 4481
Current Time: 2024-01-04 07:11:45
The entire notebook runtime so far is 1267 minutes
------------------------------------
Base classifier is BernoulliNB(alpha=0.0001)
------------------------------------
Accuracy: 0.7728185672840884
Accuracy: 0.7728185672840884
This is not the best base classifier
Confusion matrix
[[1467 252]
[ 766 1996]]
True Negatives (TN) = 1467
False Positives (FP) = 252
False Negatives (FN) = 766
True Positives (TP) = 1996
Accuracy: 0.7728185672840884
Sensitivity: 0.722664735698769
Specificity: 0.8534031413612565
Geometric Mean: 0.7853179964806178
Precision: 0.7993085087682478
Recall: 0.7728185672840884
f1-score: 0.775938440626677
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.6570 0.8534 0.7424 1719
1 0.8879 0.7227 0.7968 2762
accuracy 0.7728 4481
macro avg 0.7724 0.7880 0.7696 4481
weighted avg 0.7993 0.7728 0.7759 4481
Current Time: 2024-01-04 07:11:46
The entire notebook runtime so far is 1267 minutes
------------------------------------
Base classifier is SVC(C=10, probability=True, random_state=42)
------------------------------------
Accuracy: 0.8823923231421558
Accuracy: 0.8823923231421558
The best base_classifier so far is SVC(C=10, probability=True, random_state=42), with accuracy of 0.8823923231421558
Confusion matrix
[[1649 70]
[ 457 2305]]
True Negatives (TN) = 1649
False Positives (FP) = 70
False Negatives (FN) = 457
True Positives (TP) = 2305
Accuracy: 0.8823923231421558
Sensitivity: 0.83454018826937
Specificity: 0.9592786503781268
Geometric Mean: 0.8947382776484692
Precision: 0.898587885893586
Recall: 0.8823923231421558
f1-score: 0.8839118538129781
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7830 0.9593 0.8622 1719
1 0.9705 0.8345 0.8974 2762
accuracy 0.8824 4481
macro avg 0.8768 0.8969 0.8798 4481
weighted avg 0.8986 0.8824 0.8839 4481
Current Time: 2024-01-04 07:21:08
The entire notebook runtime so far is 1276 minutes
------------------------------------
Base classifier is KNeighborsClassifier(n_neighbors=10, weights='distance')
------------------------------------
Accuracy: 0.8491408167819683
Accuracy: 0.8491408167819683
This is not the best base classifier
Confusion matrix
[[1642 77]
[ 599 2163]]
True Negatives (TN) = 1642
False Positives (FP) = 77
False Negatives (FN) = 599
True Positives (TP) = 2163
Accuracy: 0.8491408167819683
Sensitivity: 0.7831281679942071
Specificity: 0.9552065154159395
Geometric Mean: 0.8648983341837438
Precision: 0.8762736787363693
Recall: 0.8491408167819683
f1-score: 0.8512121077194491
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7327 0.9552 0.8293 1719
1 0.9656 0.7831 0.8649 2762
accuracy 0.8491 4481
macro avg 0.8492 0.8692 0.8471 4481
weighted avg 0.8763 0.8491 0.8512 4481
Current Time: 2024-01-04 07:21:18
The entire notebook runtime so far is 1276 minutes
------------------------------------
Base classifier is MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42)
------------------------------------
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Accuracy: 0.8928810533363088
Accuracy: 0.8928810533363088
The best base_classifier so far is MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42), with accuracy of 0.8928810533363088
Confusion matrix
[[1630 89]
[ 391 2371]]
True Negatives (TN) = 1630
False Positives (FP) = 89
False Negatives (FN) = 391
True Positives (TP) = 2371
Accuracy: 0.8928810533363088
Sensitivity: 0.8584359160028965
Specificity: 0.9482257126236184
Geometric Mean: 0.902214502318354
Precision: 0.9034816993691006
Recall: 0.8928810533363088
f1-score: 0.8941084444501227
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8065 0.9482 0.8717 1719
1 0.9638 0.8584 0.9081 2762
accuracy 0.8929 4481
macro avg 0.8852 0.9033 0.8899 4481
weighted avg 0.9035 0.8929 0.8941 4481
Current Time: 2024-01-04 07:31:59
The entire notebook runtime so far is 1287 minutes
After checking each base_classifier, the best base_classifier is MLPClassifier(alpha=0.01, hidden_layer_sizes=(50, 25), max_iter=300,
random_state=42), with accuracy of 0.8928810533363088
# call previously defined function to create confusion matrix
#cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
#model_classification_report(cm, y_test_label, y_pred)
# HINT: in sklearn.ensemble.BaggingClassifier version 1.2.0, the "base_estimator" parameter was renamed to "estimator"
# The base_estimator parameter is deprecated in sklearn version 1.2.0, and will be removed in version 1.4.0
# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html
# Check to see if this version of BaggingClassifer() expects to have a "base_estimator" or "estimator" parameter
# Print the version of scikit-learn
print("Currently installed scikit-learn version is:", sklearn.__version__)
# Create an instance of the BaggingClassifier model
clf = BaggingClassifier()
# Figure out which parameters exist
default_params = clf.get_params()
print(f"Default parameters are {default_params}")
# Check to see if the base_estimator parameter exists in the BaggingClassifier, which would indicate an outdated version of scikit-learn
desired_parameter1 = 'base_estimator' # Replace with the parameter you want to check
desired_parameter2 = 'estimator' # Replace with the parameter you want to check
# This if block will only be executed if the scikit-learn package is older than 1.2
if (desired_parameter1 in clf.get_params()) and not (desired_parameter2 in clf.get_params()) :
print('\n')
print(f"WARNING: the '{desired_parameter1}' parameter exists, but the '{desired_parameter2}' parameter does not exist the BaggingClassifier.")
print(f"The parameter 'base_estimator' was deprecated in favor of 'estimator' in sklearn 1.2.0, will be removed entirely in sklearn 1.4.0.")
print(f"Your currently installed version of scikit-learn is", sklearn.__version__)
print(f"You may wish to update your installed version of scikit-learn to a minimum of 1.2.0 so you can use the 'estimator__' parameter in the next cell.")
print(f"If you are unable to update your installed version of scikit-learn, you will need to change 'estimator__' to 'base_estimator__' in the following cell for compatibility with your version of scikit-learn.")
print(f"If you are using Anaconda Navigator, you can upgrade with: conda update conda, conda update scikit-learn")
print(f"If you are not using Anaconda Navigator, you can upgrade with: pip install --upgrade scikit-learn")
Currently installed scikit-learn version is: 1.3.0
Default parameters are {'base_estimator': 'deprecated', 'bootstrap': True, 'bootstrap_features': False, 'estimator': None, 'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 10, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}
# Try different weak learners with different BaggingClassifier parameters, keeping track of which base_estimator provides the best accuracy
best_base_estimator_name = "none"
best_base_estimator_accuracy = 0 #initialize value to keep track of the accuracy level of each base classifier
base_estimators = ['lr', 'nb', 'svm', 'mlp', 'knn'] # weak learners
for base_estimator in base_estimators:
print("\n")
print(f"------------------------------------")
print(f"Base estimator is {base_estimator}")
print(f"------------------------------------")
if (base_estimator == 'lr'):
clf = BaggingClassifier(LogisticRegression(), random_state=42) # Define the BaggingClassifier
param_grid = {'estimator__penalty': [best_params_lr['penalty']], # optimized hyperparameter from base_estimator
'estimator__C': [best_params_lr['C']], # optimized hyperparameter from base_estimator
'estimator__solver': [best_params_lr['solver']], # optimized hyperparameter from base_estimator
'estimator__max_iter': [best_params_lr['max_iter']], # optimized hyperparameter from base_estimator
'n_estimators': [100], # Number of base estimators
'max_samples': [1.0], # The proportion of samples to draw from X to train each base estimator
'max_features': [1.0] # The proportion of features to draw from X to train each base estimator
}
if (base_estimator == 'nb'):
clf = BaggingClassifier(BernoulliNB(), random_state=42) # Define the BaggingClassifier
param_grid = {'estimator__alpha': [best_params_nb['alpha']], # optimized hyperparameter from base_estimator
'n_estimators': [50, 100, 200], # Number of base estimators
'max_samples': [0.5, 0.7, 1.0], # The proportion of samples to draw from X to train each base estimator
'max_features': [0.5, 0.7, 1.0] # The proportion of features to draw from X to train each base estimator
}
if (base_estimator == 'svm'):
clf = BaggingClassifier(SVC(), random_state=42) # Define the BaggingClassifier
param_grid = {'estimator__C': [best_params_svm['C']], # optimized hyperparameter from base_estimator
'estimator__kernel': [best_params_svm['kernel']], # optimized hyperparameter from base_estimator
'n_estimators': [200], # Number of base estimators
'max_samples': [1.0], # The proportion of samples to draw from X to train each base estimator
'max_features': [1.0] # The proportion of features to draw from X to train each base estimator
}
if (base_estimator == 'knn'):
clf = BaggingClassifier(KNeighborsClassifier(), random_state=42) # Define the BaggingClassifier
param_grid = {'estimator__n_neighbors': [best_params_knn['n_neighbors']], # optimized hyperparameter from base_estimator
'estimator__weights': [best_params_knn['weights']], # optimized hyperparameter from base_estimator
'n_estimators': [100], # Number of base estimators
'max_samples': [1.0], # The proportion of samples to draw from X to train each base estimator
'max_features': [0.5] # The proportion of features to draw from X to train each base estimator
}
if (base_estimator == 'mlp'):
clf = BaggingClassifier(MLPClassifier(), random_state=42) # Define the BaggingClassifier
param_grid = {'estimator__hidden_layer_sizes': [best_params_mlp['hidden_layer_sizes']], # optimized hyperparameter from base_estimator
'estimator__max_iter': [best_params_mlp['max_iter']], # optimized hyperparameter from base_estimator
'estimator__alpha': [best_params_mlp['alpha']], # optimized hyperparameter from base_estimator
'n_estimators': [100], # Number of base estimators
'max_samples': [1.0], # The proportion of samples to draw from X to train each base estimator
'max_features': [0.5] # The proportion of features to draw from X to train each base estimator
}
# Use GridSearchCV for hyperparameter tuning
print(f"Performing GridSearchCV")
grid_search = GridSearchCV(clf, param_grid, cv=cv_count, scoring='accuracy')
print(f"Fitting model")
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Print the best hyperparameters
best_params = grid_search.best_params_
best_scores = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Scores:", best_scores)
# Evaluate the model with the best hyperparameters on the test set
clf = grid_search.best_estimator_
y_pred = clf.predict(X_test)
# final cross validation
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the accuracy
accuracy = accuracy_score(y_test_label, y_pred)
print(f"Accuracy on Test Set: {accuracy}")
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
# of all the base_classifiers, check to see if this base_classifier provides the best accuracy
if (accuracy > best_base_estimator_accuracy):
best_params_ensemble_bagging = best_params #save best parameters for later comparison
best_base_estimator_name = base_estimator #save the name of the base_classifier that is currently the best
best_base_estimator_accuracy = accuracy #save the accuracy of the final estimator that is currently the best
print(f"The best base_estimator so far is {best_base_estimator_name}, with accuracy of {best_base_estimator_accuracy}")
else:
print(f"This is not the best base estimator")
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
# after testing all the final_estimators, display the best one
print(f"After checking each base_estimator, the best base_estimator is {best_base_estimator_name}, with accuracy of {best_base_estimator_accuracy}, and best_params of {best_params}")
------------------------------------
Base estimator is lr
------------------------------------
Performing GridSearchCV
Fitting model
Best Parameters: {'estimator__C': 100, 'estimator__max_iter': 100, 'estimator__penalty': 'l1', 'estimator__solver': 'liblinear', 'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 100}
Best Scores: 0.8644688644688646
Cross validation scores: [0.86446886 0.85347985 0.86593407 0.87472527 0.85347985 0.86153846
0.86300366 0.86520147 0.86959707 0.87326007]
Mean cross validation score: 0.8644688644688646
Standard Deviation cross validation score: 0.006809641048276789
Accuracy on Test Set: 0.8524882838652087
Accuracy: 0.8524882838652087
The best base_estimator so far is lr, with accuracy of 0.8524882838652087
Confusion matrix
[[1618 101]
[ 560 2202]]
True Negatives (TN) = 1618
False Positives (FP) = 101
False Negatives (FN) = 560
True Positives (TP) = 2202
Accuracy: 0.8524882838652087
Sensitivity: 0.7972483707458363
Specificity: 0.9412449098312973
Geometric Mean: 0.8662597594462145
Precision: 0.874333117221592
Recall: 0.8524882838652087
f1-score: 0.8544915663446427
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7429 0.9412 0.8304 1719
1 0.9561 0.7972 0.8695 2762
accuracy 0.8525 4481
macro avg 0.8495 0.8692 0.8499 4481
weighted avg 0.8743 0.8525 0.8545 4481
Current Time: 2024-01-04 08:14:15
The entire notebook runtime so far is 1329 minutes
------------------------------------
Base estimator is nb
------------------------------------
Performing GridSearchCV
Fitting model
Best Parameters: {'estimator__alpha': 0.0001, 'max_features': 0.7, 'max_samples': 1.0, 'n_estimators': 200}
Best Scores: 0.7874725274725275
Cross validation scores: [0.78315018 0.76556777 0.78901099 0.81098901 0.7978022 0.78095238
0.796337 0.79267399 0.76996337 0.78827839]
Mean cross validation score: 0.7874725274725275
Standard Deviation cross validation score: 0.012712047718623184
Accuracy on Test Set: 0.7741575541173845
Accuracy: 0.7741575541173845
This is not the best base estimator
Confusion matrix
[[1468 251]
[ 761 2001]]
True Negatives (TN) = 1468
False Positives (FP) = 251
False Negatives (FN) = 761
True Positives (TP) = 2001
Accuracy: 0.7741575541173845
Sensitivity: 0.724475018102824
Specificity: 0.8539848749272833
Geometric Mean: 0.7865689465790532
Precision: 0.8003293087184692
Recall: 0.7741575541173845
f1-score: 0.7772588376998616
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.6586 0.8540 0.7437 1719
1 0.8885 0.7245 0.7982 2762
accuracy 0.7742 4481
macro avg 0.7736 0.7892 0.7709 4481
weighted avg 0.8003 0.7742 0.7773 4481
Current Time: 2024-01-04 08:19:36
The entire notebook runtime so far is 1335 minutes
------------------------------------
Base estimator is svm
------------------------------------
Performing GridSearchCV
Fitting model
Best Parameters: {'estimator__C': 10, 'estimator__kernel': 'rbf', 'max_features': 1.0, 'max_samples': 1.0, 'n_estimators': 200}
Best Scores: 0.8911355311355311
Cross validation scores: [0.9025641 0.88058608 0.89084249 0.90695971 0.88498168 0.88571429
0.88644689 0.89157509 0.88498168 0.8967033 ]
Mean cross validation score: 0.8911355311355311
Standard Deviation cross validation score: 0.008066596107546274
Accuracy on Test Set: 0.8768132113367552
Accuracy: 0.8768132113367552
The best base_estimator so far is svm, with accuracy of 0.8768132113367552
Confusion matrix
[[1666 53]
[ 499 2263]]
True Negatives (TN) = 1666
False Positives (FP) = 53
False Negatives (FN) = 499
True Positives (TP) = 2263
Accuracy: 0.8768132113367552
Sensitivity: 0.8193338160753078
Specificity: 0.9691681210005817
Geometric Mean: 0.8911072971297801
Precision: 0.8974759907435244
Recall: 0.8768132113367552
f1-score: 0.8784762463998154
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7695 0.9692 0.8579 1719
1 0.9771 0.8193 0.8913 2762
accuracy 0.8768 4481
macro avg 0.8733 0.8943 0.8746 4481
weighted avg 0.8975 0.8768 0.8785 4481
Current Time: 2024-01-04 10:15:10
The entire notebook runtime so far is 1450 minutes
------------------------------------
Base estimator is mlp
------------------------------------
Performing GridSearchCV
Fitting model
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Best Parameters: {'estimator__alpha': 0.01, 'estimator__hidden_layer_sizes': (50, 25), 'estimator__max_iter': 300, 'max_features': 0.5, 'max_samples': 1.0, 'n_estimators': 100}
Best Scores: 0.898021978021978
C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet. C:\Users\njeffrey\anaconda3\lib\site-packages\sklearn\neural_network\_multilayer_perceptron.py:691: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (300) reached and the optimization hasn't converged yet.
Cross validation scores: [0.9010989 0.88937729 0.9040293 0.91428571 0.89304029 0.8996337 0.88498168 0.9047619 0.88937729 0.8996337 ] Mean cross validation score: 0.898021978021978 Standard Deviation cross validation score: 0.00839651210660479 Accuracy on Test Set: 0.8881945994197724 Accuracy: 0.8881945994197724 The best base_estimator so far is mlp, with accuracy of 0.8881945994197724
Confusion matrix
[[1648 71]
[ 430 2332]]
True Negatives (TN) = 1648
False Positives (FP) = 71
False Negatives (FN) = 430
True Positives (TP) = 2332
Accuracy: 0.8881945994197724
Sensitivity: 0.8443157132512672
Specificity: 0.9586969168121
Geometric Mean: 0.8996904307093629
Precision: 0.90240585114681
Recall: 0.8881945994197724
f1-score: 0.8895945168123885
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7931 0.9587 0.8681 1719
1 0.9705 0.8443 0.9030 2762
accuracy 0.8882 4481
macro avg 0.8818 0.9015 0.8855 4481
weighted avg 0.9024 0.8882 0.8896 4481
Current Time: 2024-01-04 17:01:52
The entire notebook runtime so far is 1857 minutes
------------------------------------
Base estimator is knn
------------------------------------
Performing GridSearchCV
Fitting model
Best Parameters: {'estimator__n_neighbors': 10, 'estimator__weights': 'distance', 'max_features': 0.5, 'max_samples': 1.0, 'n_estimators': 100}
Best Scores: 0.8724542124542125
Cross validation scores: [0.88058608 0.86373626 0.86813187 0.89450549 0.86886447 0.87106227
0.86153846 0.86739927 0.86520147 0.88351648]
Mean cross validation score: 0.8724542124542125
Standard Deviation cross validation score: 0.009907460022217087
Accuracy on Test Set: 0.8489176523097522
Accuracy: 0.8489176523097522
This is not the best base estimator
Confusion matrix
[[1673 46]
[ 631 2131]]
True Negatives (TN) = 1673
False Positives (FP) = 46
False Negatives (FN) = 631
True Positives (TP) = 2131
Accuracy: 0.8489176523097522
Sensitivity: 0.7715423606082549
Specificity: 0.9732402559627691
Geometric Mean: 0.8665426039869575
Precision: 0.8819133667567919
Recall: 0.8489176523097522
f1-score: 0.8509549102076678
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.7261 0.9732 0.8317 1719
1 0.9789 0.7715 0.8629 2762
accuracy 0.8489 4481
macro avg 0.8525 0.8724 0.8473 4481
weighted avg 0.8819 0.8489 0.8510 4481
Current Time: 2024-01-04 17:04:39
The entire notebook runtime so far is 1860 minutes
After checking each base_estimator, the best base_estimator is mlp, with accuracy of 0.8881945994197724, and best_params of {'estimator__n_neighbors': 10, 'estimator__weights': 'distance', 'max_features': 0.5, 'max_samples': 1.0, 'n_estimators': 100}
In this example:
LR, SVC, KNN, NB, MLP are individual base classifiers.
An AdaBoostClassifier is created with these base classifiers.
The AdaBoost classifier is trained on the training set.
Predictions are made on the test set, and the performance of the AdaBoost classifier is evaluated.
You can adjust the parameters such as n_estimators and learning_rate based on your specific needs. Note that AdaBoost works best with weak learners, so base classifiers like decision trees with limited depth are commonly used.
The AdaBoostClassifier can use different base classifiers (weak learners) as its base estimator. The base_estimator parameter of the AdaBoostClassifier allows you to specify the type of weak learner to use. If not specified, the default is a decision stump (DecisionTreeClassifier(max_depth=1)).
Using RandomForestClassifier as a base estimator for AdaBoostClassifier is generally not a common practice because AdaBoost is typically used with weak learners, and Random Forests are already ensemble methods that use multiple decision trees.
However, if you still want to experiment with this combination, you can specify RandomForestClassifier as a base_estimator in AdaBoostClassifier.
Keep in mind that using RandomForestClassifier as a base estimator for AdaBoost might not provide significant advantages, as Random Forests are already powerful ensemble models. AdaBoost is often more beneficial when combined with weak learners like shallow decision trees (stumps). It's recommended to experiment with different combinations and evaluate their performance on your specific dataset.
# HINT: in sklearn.ensemble.AdaBoostClassifier version 1.2.0, the "base_estimator" parameter was renamed to "estimator"
# The base_estimator parameter is deprecated in sklearn version 1.2.0, and will be removed in version 1.4.0
# https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.BaggingClassifier.html
# Check to see if this version of AdaBoostClassifer() expects to have a "base_estimator" or "estimator" parameter
# Print the version of scikit-learn
print("Currently installed scikit-learn version is:", sklearn.__version__)
# Create an instance of the BaggingClassifier model
clf = AdaBoostClassifier()
# Figure out which parameters exist
default_params = clf.get_params()
print(f"Default parameters are {default_params}")
# Check to see if the base_estimator parameter exists in the BaggingClassifier, which would indicate an outdated version of scikit-learn
desired_parameter1 = 'base_estimator' # Replace with the parameter you want to check
desired_parameter2 = 'estimator' # Replace with the parameter you want to check
# This if block will only be executed if the scikit-learn package is older than 1.2
if (desired_parameter1 in clf.get_params()) and not (desired_parameter2 in clf.get_params()) :
print('\n')
print(f"WARNING: the '{desired_parameter1}' parameter exists, but the '{desired_parameter2}' parameter does not exist the AdaBoostClassifier.")
print(f"The parameter 'base_estimator' was deprecated in favor of 'estimator' in sklearn 1.2.0, will be removed entirely in sklearn 1.4.0.")
print(f"Your currently installed version of scikit-learn is", sklearn.__version__)
print(f"You may wish to update your installed version of scikit-learn to a minimum of 1.2.0 so you can use the 'estimator__' parameter in the next cell.")
print(f"If you are unable to update your installed version of scikit-learn, you will need to change 'estimator__' to 'base_estimator__' in the following cell for compatibility with your version of scikit-learn.")
print(f"If you are using Anaconda Navigator, you can upgrade with: conda update conda, conda update scikit-learn")
print(f"If you are not using Anaconda Navigator, you can upgrade with: pip install --upgrade scikit-learn")
Currently installed scikit-learn version is: 1.3.0
Default parameters are {'algorithm': 'SAMME.R', 'base_estimator': 'deprecated', 'estimator': None, 'learning_rate': 1.0, 'n_estimators': 50, 'random_state': None}
# AdaBoostClassifier with multiple base classifiers
# Define multiple base classifiers
base_classifiers = [
#DecisionTreeClassifier(max_depth=1), # Decision stump
LogisticRegression(C=100, max_iter=100, penalty='l2', solver='liblinear'),
BernoulliNB(alpha=0.1),
SVC(kernel='linear', C=0.1), # Support Vector Machine with linear kernel
KNeighborsClassifier(n_neighbors=10, weights='uniform'),
MLPClassifier(hidden_layer_sizes=[50,25], max_iter=800)
]
# Create the AdaBoostClassifier, setting estimator=None because we will add multiple base_classifiers in the next step
clf = AdaBoostClassifier(estimator=None, n_estimators=50, random_state=42)
# Set the base classifiers as the base estimator
clf.estimator_ = base_classifiers
# Fit the model to the training data
clf.fit(X_train_resampled, y_train_label_resampled)
# Predict on the test set
y_pred = clf.predict(X_test)
# Evaluate the accuracy
accuracy = accuracy_score(y_test_label, y_pred)
print(f"Accuracy: {accuracy}")
accuracy = clf.score(X_test, y_test_label)
print("Accuracy:", accuracy)
print('\n')
# save accuracy for later comparison
accuracy_ensemble_boosting = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Accuracy: 0.9290336978353047 Accuracy: 0.9290336978353047
Confusion matrix
[[1641 78]
[ 240 2522]]
True Negatives (TN) = 1641
False Positives (FP) = 78
False Negatives (FN) = 240
True Positives (TP) = 2522
Accuracy: 0.9290336978353047
Sensitivity: 0.9131064446053584
Specificity: 0.9546247818499127
Geometric Mean: 0.93363485393763
Precision: 0.9325618961078621
Recall: 0.9290336978353047
f1-score: 0.9295584011268598
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8724 0.9546 0.9117 1719
1 0.9700 0.9131 0.9407 2762
accuracy 0.9290 4481
macro avg 0.9212 0.9339 0.9262 4481
weighted avg 0.9326 0.9290 0.9296 4481
Current Time: 2024-01-04 17:04:42
The entire notebook runtime so far is 1860 minutes
print(f"Performing hyperparameter optimization for AdaBoostClassifier")
# Define multiple base classifiers
base_classifiers = [
LogisticRegression(C=100, max_iter=100, penalty='l2', solver='liblinear'),
BernoulliNB(alpha=0.1),
SVC(kernel='linear', C=0.1), # Support Vector Machine with linear kernel
KNeighborsClassifier(n_neighbors=10, weights='uniform'),
MLPClassifier(hidden_layer_sizes=[50,25], max_iter=800)
]
# Define the hyperparameters to tune for AdaBoostClassifier
param_grid = {
'n_estimators': [50, 100, 200], # Number of boosting rounds
'learning_rate': [0.01, 0.1, 1.0] # Weight applied to each classifier
}
# Create the AdaBoostClassifier, setting estimator=None because we will add multiple base_classifiers in the next step
clf = AdaBoostClassifier(estimator=None, random_state=42)
# Set the base classifiers as the base_estimator
clf.estimator_ = base_classifiers
# Use GridSearchCV for hyperparameter tuning
print(f"Performing GridSearchCV")
grid_search = GridSearchCV(clf, param_grid, cv=5, scoring='accuracy')
print(f"Fitting model")
grid_search.fit(X_train_resampled, y_train_label_resampled)
# Validate on Test Set
clf = grid_search.best_estimator_
print(f"Found best_estimator_ {clf}")
y_pred = clf.predict(X_test)
# Perform cross-validation and return both scores and standard deviations
cross_val_score_result = cross_val_score(clf, X_train_resampled, y_train_label_resampled, cv=cv_count)
print(f"Cross validation scores: {cross_val_score_result}")
print(f"Mean cross validation score: {cross_val_score_result.mean()}")
print(f"Standard Deviation cross validation score: {cross_val_score_result.std()}")
# Evaluate the accuracy
accuracy = accuracy_score(y_test_label, y_pred)
print(f"Final Accuracy on Test Set: {accuracy}")
# This method of calculating accuracy generates an error with AdaBoostClassifier
#accuracy = clf.score(X_test, y_test_label)
#print("Accuracy:", accuracy)
#print('\n')
# save accuracy for later comparison
accuracy_ensemble_boosting = accuracy
# call previously defined function to create confusion matrix
cm = visualize_confusion_matrix(y_test_label, y_pred)
# call previously defined function to create report on model precision, recall, f1-score, accuracy
model_classification_report(cm, y_test_label, y_pred)
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Performing hyperparameter optimization for AdaBoostClassifier Performing GridSearchCV Fitting model Found best_estimator_ AdaBoostClassifier(n_estimators=200, random_state=42) Cross validation scores: [0.94432234 0.93626374 0.94285714 0.94285714 0.93113553 0.93626374 0.94212454 0.94139194 0.92600733 0.93113553] Mean cross validation score: 0.9374358974358975 Standard Deviation cross validation score: 0.005971482453425571 Final Accuracy on Test Set: 0.9330506583351931
Confusion matrix
[[1647 72]
[ 228 2534]]
True Negatives (TN) = 1647
False Positives (FP) = 72
False Negatives (FN) = 228
True Positives (TP) = 2534
Accuracy: 0.9330506583351931
Sensitivity: 0.9174511223750905
Specificity: 0.9581151832460733
Geometric Mean: 0.9375627180267597
Precision: 0.9363221482432093
Recall: 0.9330506583351931
f1-score: 0.9335308382044839
------------------------------------------------
Classification Report:
precision recall f1-score support
0 0.8784 0.9581 0.9165 1719
1 0.9724 0.9175 0.9441 2762
accuracy 0.9331 4481
macro avg 0.9254 0.9378 0.9303 4481
weighted avg 0.9363 0.9331 0.9335 4481
Current Time: 2024-01-04 17:10:28
The entire notebook runtime so far is 1866 minutes
print(f"LR accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_lr_undersampled_unoptimized*100:.2f}%")
print(f"LR accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_lr_undersampled_optimized*100:.2f}%")
print('\n')
print(f"DT accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_dt_undersampled_unoptimized*100:.2f}%")
print(f"DT accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_dt_undersampled_optimized*100:.2f}%")
print('\n')
print(f"DS accuracy on undersampled balanced data, without hyperparameter optimimization: {accuracy_ds_undersampled_unoptimized*100:.2f}%")
print(f"DS accuracy on undersampled balanced data, with hyperparameter optimimization: {accuracy_ds_undersampled_optimized*100:.2f}%")
print('\n')
print(f"RF accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_rf_undersampled_unoptimized*100:.2f}%")
print(f"RF accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_rf_undersampled_optimized*100:.2f}%")
print('\n')
print(f"NB accuracy on undersampled balanced data, without hyperparameter optimimization: {accuracy_nb_undersampled_unoptimized*100:.2f}%")
print(f"NB accuracy on undersampled balanced data, with hyperparameter optimimization: {accuracy_nb_undersampled_optimized*100:.2f}%")
print('\n')
print(f"SVM accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_svm_undersampled_unoptimized*100:.2f}%")
print(f"SVM accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_svm_undersampled_optimized*100:.2f}%")
print('\n')
print(f"KNN accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_knn_undersampled_unoptimized*100:.2f}%")
print(f"KNN accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_knn_undersampled_optimized*100:.2f}%")
print('\n')
print(f"MLP accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_mlp_undersampled_unoptimized*100:.2f}%")
print(f"MLP accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_mlp_undersampled_optimized*100:.2f}%")
print('\n')
print(f"GB accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_gb_undersampled_unoptimized*100:.2f}%")
print(f"GB accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_gb_undersampled_optimized*100:.2f}%")
print('\n')
print(f"XGB accuracy on undersampled balanced data, before hyperparameter optimimization: {accuracy_xgb_undersampled_unoptimized*100:.2f}%")
print(f"XGB accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_xgb_undersampled_optimized*100:.2f}%")
print('\n')
print(f"Ensemble voting accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_ensemble_voting*100:.2f}%")
print(f"Ensemble stacking accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_ensemble_stacking*100:.2f}%")
print(f"Ensemble bagging accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_ensemble_bagging*100:.2f}%")
print(f"Ensemble boosting accuracy on undersampled balanced data, after hyperparameter optimimization: {accuracy_ensemble_boosting*100:.2f}%")
LR accuracy on undersampled balanced data, before hyperparameter optimimization: 85.03% LR accuracy on undersampled balanced data, after hyperparameter optimimization: 85.23% DT accuracy on undersampled balanced data, before hyperparameter optimimization: 92.68% DT accuracy on undersampled balanced data, after hyperparameter optimimization: 93.66% DS accuracy on undersampled balanced data, without hyperparameter optimimization: 76.23% DS accuracy on undersampled balanced data, with hyperparameter optimimization: 76.23% RF accuracy on undersampled balanced data, before hyperparameter optimimization: 94.87% RF accuracy on undersampled balanced data, after hyperparameter optimimization: 93.22% NB accuracy on undersampled balanced data, without hyperparameter optimimization: 77.24% NB accuracy on undersampled balanced data, with hyperparameter optimimization: 77.28% SVM accuracy on undersampled balanced data, before hyperparameter optimimization: 85.70% SVM accuracy on undersampled balanced data, after hyperparameter optimimization: 87.61% KNN accuracy on undersampled balanced data, before hyperparameter optimimization: 85.11% KNN accuracy on undersampled balanced data, after hyperparameter optimimization: 85.05% MLP accuracy on undersampled balanced data, before hyperparameter optimimization: 89.20% MLP accuracy on undersampled balanced data, after hyperparameter optimimization: 88.51% GB accuracy on undersampled balanced data, before hyperparameter optimimization: 94.09% GB accuracy on undersampled balanced data, after hyperparameter optimimization: 95.16% XGB accuracy on undersampled balanced data, before hyperparameter optimimization: 95.16% XGB accuracy on undersampled balanced data, after hyperparameter optimimization: 94.93% Ensemble voting accuracy on undersampled balanced data, after hyperparameter optimimization: 84.45% Ensemble stacking accuracy on undersampled balanced data, after hyperparameter optimimization: 89.36% Ensemble bagging accuracy on undersampled balanced data, after hyperparameter optimimization: 89.29% Ensemble boosting accuracy on undersampled balanced data, after hyperparameter optimimization: 93.31%
# show a running total of elapsed time for the entire notebook
show_elapsed_time()
Current Time: 2024-01-04 17:10:28 The entire notebook runtime so far is 1866 minutes